diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b5dbefef4e..5c84e856d5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -279,7 +279,6 @@ jobs: - aarch64-unknown-linux-gnu - aarch64_be-unknown-linux-gnu - armv7-unknown-linux-gnueabihf - - arm-unknown-linux-gnueabihf - x86_64-unknown-linux-gnu profile: [dev, release] include: diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 2768c521eb..e2b3d95585 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,17 +1,21 @@ FROM ubuntu:25.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ gcc-aarch64-linux-gnu \ - g++-aarch64-linux-gnu \ libc6-dev-arm64-cross \ qemu-user \ make \ file \ - clang \ - lld + xz-utils \ + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index f85c6a2592..d7c12493ad 100644 --- a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -2,17 +2,15 @@ FROM ubuntu:25.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ libc6-dev-arm64-cross \ qemu-user \ make \ file \ - clang \ curl \ xz-utils \ - lld + wget ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu" @@ -21,6 +19,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin RUN tar -xvf "${TOOLCHAIN}.tar.xz" RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 6d4ff24828..23e4d5a341 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -7,7 +7,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libc6-dev-armhf-cross \ qemu-user \ make \ - file + file \ + clang \ + lld ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 602249c0ec..02744917af 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,17 +1,21 @@ FROM ubuntu:24.04 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ gcc-arm-linux-gnueabihf \ - g++-arm-linux-gnueabihf \ libc6-dev-armhf-cross \ qemu-user \ make \ file \ - clang \ - lld + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 17c6d25215..17d1ac67e7 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -6,15 +6,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ ca-certificates \ wget \ - xz-utils \ - clang \ - libstdc++-14-dev \ - build-essential \ - lld + xz-utils RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ -rtm-mode full -tsx --" diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 89104e2672..5c17d94298 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -5,127 +5,56 @@ set -ex : "${TARGET?The TARGET environment variable must be set.}" export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" -export HOST_RUSTFLAGS="${RUSTFLAGS}" export PROFILE="${PROFILE:="release"}" -case ${TARGET} in - # On 32-bit use a static relocation model which avoids some extra - # instructions when dealing with static data, notably allowing some - # instruction assertion checks to pass below the 20 instruction limit. If - # this is the default, dynamic, then too many instructions are generated - # when we assert the instruction for a function and it causes tests to fail. - i686-* | i586-*) - export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static" - ;; - # Some x86_64 targets enable by default more features beyond SSE2, - # which cause some instruction assertion checks to fail. - x86_64-*) - export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3" - ;; - #Unoptimized build uses fast-isel which breaks with msa - mips-* | mipsel-*) - export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false" - ;; - armv7-*eabihf | thumbv7-*eabihf) - export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon" - ;; - # Some of our test dependencies use the deprecated `gcc` crates which - # doesn't detect RISC-V compilers automatically, so do it manually here. - riscv*) - export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc" - ;; -esac - echo "RUSTFLAGS=${RUSTFLAGS}" -echo "OBJDUMP=${OBJDUMP}" echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -# Test targets compiled with extra features. +export CC="clang" + case ${TARGET} in - # Setup aarch64 & armv7 specific variables, the runner, along with some - # tests to skip - aarch64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" + aarch64_be*) + export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/include/ --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt ;; - aarch64_be-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" + aarch64*) + export CFLAGS="-I/usr/aarch64-linux-gnu/include/" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt ;; - armv7-unknown-linux-gnueabihf*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/" + armv7*) + export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" ;; - x86_64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + x86_64*) + export CFLAGS="-I/usr/include/x86_64-linux-gnu/" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}" ;; *) ;; esac -# Arm specific case "${TARGET}" in - aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" --release \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" - ;; - - aarch64_be-unknown-linux-gnu*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" --release \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ - --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" - ;; - x86_64-unknown-linux-gnu*) - # `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER` is not necessary for `intrinsic-test` - # because the binary needs to run directly on the host. - # Hence the use of `env -u`. env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \ - RUST_LOG=warn RUST_BACKTRACE=1 \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ - --runner "${TEST_RUNNER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" + --target "${TARGET}" + + echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" ;; - *) + *) + cargo run "${INTRINSIC_TEST}" --release \ + --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ + --skip "${TEST_SKIP_INTRINSICS}" \ + --target "${TARGET}" ;; esac + +cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index fef672cab9..7780c9474f 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -1029,6 +1029,7 @@ pub fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { #[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1045,8 +1046,33 @@ pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vcadd_rot270_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v4f16" + )] + fn _vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcadd_rot270_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1063,8 +1089,33 @@ pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vcaddq_rot270_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v8f16" + )] + fn _vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcaddq_rot270_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1079,8 +1130,31 @@ pub fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vcadd_rot270_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32" + )] + fn _vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vcadd_rot270_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1095,8 +1169,31 @@ pub fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vcaddq_rot270_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32" + )] + fn _vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcaddq_rot270_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1111,8 +1208,31 @@ pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { _vcaddq_rot270_f64(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64" + )] + fn _vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vcaddq_rot270_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1129,8 +1249,33 @@ pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vcadd_rot90_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v4f16" + )] + fn _vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcadd_rot90_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1147,8 +1292,33 @@ pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vcaddq_rot90_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v8f16" + )] + fn _vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcaddq_rot90_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1163,8 +1333,31 @@ pub fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vcadd_rot90_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32" + )] + fn _vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vcadd_rot90_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1179,8 +1372,31 @@ pub fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vcaddq_rot90_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32" + )] + fn _vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcaddq_rot90_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1194,6 +1410,28 @@ pub fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } unsafe { _vcaddq_rot90_f64(a, b) } } +#[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64" + )] + fn _vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vcaddq_rot90_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point absolute compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f64)"] #[inline] @@ -2905,6 +3143,7 @@ pub fn vcltzh_f16(a: f16) -> u16 { #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -2921,58 +3160,158 @@ pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t unsafe { _vcmla_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { +pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16" + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f16" )] - fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + fn _vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vcmlaq_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { +pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32" + link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16" )] - fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; } - unsafe { _vcmla_f32(a, b, c) } + unsafe { _vcmlaq_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { +pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32" + link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16" )] - fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32" + )] + fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { _vcmla_f32(a, b, c) } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32" + )] + fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32" + )] + fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; } unsafe { _vcmlaq_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32" + )] + fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -2987,6 +3326,29 @@ pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t unsafe { _vcmlaq_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64" + )] + fn _vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3055,6 +3417,7 @@ pub fn vcmlaq_lane_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_lane_f32( a: float32x2_t, b: float32x2_t, @@ -3073,6 +3436,7 @@ pub fn vcmla_lane_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_lane_f32( a: float32x4_t, b: float32x4_t, @@ -3094,6 +3458,54 @@ pub fn vcmlaq_lane_f32( } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3162,6 +3574,7 @@ pub fn vcmlaq_laneq_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_laneq_f32( a: float32x2_t, b: float32x2_t, @@ -3180,6 +3593,7 @@ pub fn vcmla_laneq_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_laneq_f32( a: float32x4_t, b: float32x4_t, @@ -3201,8 +3615,57 @@ pub fn vcmlaq_laneq_f32( } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3219,8 +3682,34 @@ pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float unsafe { _vcmla_rot180_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f16" + )] + fn _vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_rot180_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3237,58 +3726,155 @@ pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> floa unsafe { _vcmlaq_rot180_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { +pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32" + link_name = "llvm.aarch64.neon.vcmla.rot180.v8f16" )] - fn _vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + fn _vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_rot180_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vcmla_rot180_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { +pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32" + link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32" )] - fn _vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + fn _vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; } - unsafe { _vcmlaq_rot180_f32(a, b, c) } + unsafe { _vcmla_rot180_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { +pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64" + link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32" )] - fn _vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + fn _vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_rot180_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vcmlaq_rot180_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] -#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32" + )] + fn _vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { _vcmlaq_rot180_f32(a, b, c) } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32" + )] + fn _vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_rot180_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64" + )] + fn _vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { _vcmlaq_rot180_f64(a, b, c) } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64" + )] + fn _vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_rot180_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f16)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3353,6 +3939,7 @@ pub fn vcmlaq_rot180_lane_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_rot180_lane_f32( a: float32x2_t, b: float32x2_t, @@ -3371,6 +3958,7 @@ pub fn vcmla_rot180_lane_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_rot180_lane_f32( a: float32x4_t, b: float32x4_t, @@ -3392,6 +3980,54 @@ pub fn vcmlaq_rot180_lane_f32( } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_rot180_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_rot180_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_rot180_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_rot180_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3460,6 +4096,7 @@ pub fn vcmlaq_rot180_laneq_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_rot180_laneq_f32( a: float32x2_t, b: float32x2_t, @@ -3478,6 +4115,7 @@ pub fn vcmla_rot180_laneq_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_rot180_laneq_f32( a: float32x4_t, b: float32x4_t, @@ -3499,8 +4137,57 @@ pub fn vcmlaq_rot180_laneq_f32( } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_rot180_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_rot180_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_rot180_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_rot180_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3517,8 +4204,34 @@ pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float unsafe { _vcmla_rot270_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v4f16" + )] + fn _vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_rot270_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3535,8 +4248,34 @@ pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> floa unsafe { _vcmlaq_rot270_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16" + )] + fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_rot270_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3551,8 +4290,32 @@ pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float unsafe { _vcmla_rot270_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32" + )] + fn _vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_rot270_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3567,8 +4330,32 @@ pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> floa unsafe { _vcmlaq_rot270_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32" + )] + fn _vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_rot270_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3583,11 +4370,34 @@ pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa unsafe { _vcmlaq_rot270_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] -#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] -#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64" + )] + fn _vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_rot270_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f16)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg(not(target_arch = "arm64ec"))] @@ -3651,6 +4461,7 @@ pub fn vcmlaq_rot270_lane_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_rot270_lane_f32( a: float32x2_t, b: float32x2_t, @@ -3669,6 +4480,7 @@ pub fn vcmla_rot270_lane_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_rot270_lane_f32( a: float32x4_t, b: float32x4_t, @@ -3690,6 +4502,54 @@ pub fn vcmlaq_rot270_lane_f32( } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_rot270_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_rot270_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_rot270_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_rot270_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3758,6 +4618,7 @@ pub fn vcmlaq_rot270_laneq_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_rot270_laneq_f32( a: float32x2_t, b: float32x2_t, @@ -3776,6 +4637,7 @@ pub fn vcmla_rot270_laneq_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_rot270_laneq_f32( a: float32x4_t, b: float32x4_t, @@ -3797,8 +4659,57 @@ pub fn vcmlaq_rot270_laneq_f32( } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_rot270_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_rot270_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_rot270_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_rot270_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3815,8 +4726,34 @@ pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float1 unsafe { _vcmla_rot90_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v4f16" + )] + fn _vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_rot90_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3833,8 +4770,34 @@ pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float unsafe { _vcmlaq_rot90_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v8f16" + )] + fn _vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_rot90_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3849,8 +4812,32 @@ pub fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3 unsafe { _vcmla_rot90_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32" + )] + fn _vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_rot90_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3865,8 +4852,32 @@ pub fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float unsafe { _vcmlaq_rot90_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32" + )] + fn _vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_rot90_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3881,6 +4892,29 @@ pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float unsafe { _vcmlaq_rot90_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64" + )] + fn _vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_rot90_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3949,6 +4983,7 @@ pub fn vcmlaq_rot90_lane_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_rot90_lane_f32( a: float32x2_t, b: float32x2_t, @@ -3967,6 +5002,7 @@ pub fn vcmla_rot90_lane_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_rot90_lane_f32( a: float32x4_t, b: float32x4_t, @@ -3988,46 +5024,93 @@ pub fn vcmlaq_rot90_lane_f32( } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"] #[inline] #[target_feature(enable = "neon,fcma")] #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] -#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vcmla_rot90_laneq_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x8_t, -) -> float16x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg(target_endian = "big")] +pub fn vcmla_rot90_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert!(LANE == 0); unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot90_f16(a, b, c) + let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_rot90_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f32)"] #[inline] #[target_feature(enable = "neon,fcma")] #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] -#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vcmlaq_rot90_laneq_f16( - a: float16x8_t, - b: float16x8_t, +#[cfg(target_endian = "big")] +pub fn vcmlaq_rot90_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert!(LANE == 0); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_rot90_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vcmla_rot90_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot90_f16(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vcmlaq_rot90_laneq_f16( + a: float16x8_t, + b: float16x8_t, c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); @@ -4056,6 +5139,7 @@ pub fn vcmlaq_rot90_laneq_f16( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmla_rot90_laneq_f32( a: float32x2_t, b: float32x2_t, @@ -4074,6 +5158,7 @@ pub fn vcmla_rot90_laneq_f32( #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "little")] pub fn vcmlaq_rot90_laneq_f32( a: float32x4_t, b: float32x4_t, @@ -4094,15 +5179,77 @@ pub fn vcmlaq_rot90_laneq_f32( vcmlaq_rot90_f32(a, b, c) } } +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmla_rot90_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x2_t = + simd_shuffle!(c, c, [2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]); + vcmla_rot90_f32(a, b, c) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(target_endian = "big")] +pub fn vcmlaq_rot90_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: float32x4_t = simd_shuffle!( + c, + c, + [ + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1, + 2 * (1 - LANE) as u32, + 2 * (1 - LANE) as u32 + 1 + ] + ); + vcmlaq_rot90_f32(a, b, c) + } +} #[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(mov))] pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t { unsafe { simd_shuffle!(a, b, [0, 1]) } } +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(mov))] +pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t { + unsafe { + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"] #[inline] @@ -4604,6 +5751,7 @@ pub fn vcopyq_lane_u64( #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr( all(test, target_endian = "little"), @@ -4621,6 +5769,31 @@ pub fn vcopyq_lane_p64( unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) } } #[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 1, LANE2 = 0) +)] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopyq_lane_p64( + a: poly64x2_t, + b: poly64x1_t, +) -> poly64x2_t { + static_assert_uimm_bits!(LANE1, 1); + static_assert!(LANE2 == 0); + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = vcombine_p64(b, b); + let ret_val: poly64x2_t = + simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"] #[inline] #[target_feature(enable = "neon")] @@ -4988,6 +6161,7 @@ pub fn vcopyq_laneq_p16( #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr( all(test, target_endian = "little"), @@ -5004,6 +6178,31 @@ pub fn vcopyq_laneq_p64( unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) } } #[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopyq_laneq_p64( + a: poly64x2_t, + b: poly64x2_t, +) -> poly64x2_t { + static_assert_uimm_bits!(LANE1, 1); + static_assert_uimm_bits!(LANE2, 1); + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = + simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"] #[inline] #[target_feature(enable = "neon")] @@ -5516,7 +6715,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtah_s16_f16(a: f16) -> i16 { - vcvtah_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i16.f16" + )] + fn _vcvtah_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtah_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"] @@ -5560,7 +6766,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtah_u16_f16(a: f16) -> u16 { - vcvtah_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i16.f16" + )] + fn _vcvtah_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtah_u16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"] @@ -6202,7 +7415,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtmh_s16_f16(a: f16) -> i16 { - vcvtmh_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i16.f16" + )] + fn _vcvtmh_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtmh_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"] @@ -6238,7 +7458,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 { } unsafe { _vcvtmh_s64_f16(a) } } -#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"] #[inline] #[cfg_attr(test, assert_instr(fcvtmu))] @@ -6246,7 +7466,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtmh_u16_f16(a: f16) -> u16 { - vcvtmh_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i16.f16" + )] + fn _vcvtmh_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtmh_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"] @@ -6550,7 +7777,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtnh_s16_f16(a: f16) -> i16 { - vcvtnh_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i16.f16" + )] + fn _vcvtnh_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtnh_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"] @@ -6594,7 +7828,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtnh_u16_f16(a: f16) -> u16 { - vcvtnh_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i16.f16" + )] + fn _vcvtnh_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtnh_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"] @@ -6898,7 +8139,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtph_s16_f16(a: f16) -> i16 { - vcvtph_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i16.f16" + )] + fn _vcvtph_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtph_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"] @@ -6942,7 +8190,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtph_u16_f16(a: f16) -> u16 { - vcvtph_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i16.f16" + )] + fn _vcvtph_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtph_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"] @@ -7543,6 +8798,7 @@ pub fn vduph_laneq_f16(a: float16x8_t) -> f16 { #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 0))] #[rustc_legacy_const_generics(1)] @@ -7552,8 +8808,24 @@ pub fn vdupq_lane_f64(a: float64x1_t) -> float64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_lane_f64(a: float64x1_t) -> float64x2_t { + static_assert!(N == 0); + unsafe { + let ret_val: float64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 0))] #[rustc_legacy_const_generics(1)] @@ -7563,8 +8835,24 @@ pub fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t { + static_assert!(N == 0); + unsafe { + let ret_val: poly64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 1))] #[rustc_legacy_const_generics(1)] @@ -7574,29 +8862,62 @@ pub fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { +pub fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N = 1))] +#[cfg_attr(test, assert_instr(dup, N = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vdups_lane_f32(a: float32x2_t) -> f32 { +pub fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { static_assert_uimm_bits!(N, 1); - vget_lane_f32::(a) + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdups_lane_f32(a: float32x2_t) -> f32 { + static_assert_uimm_bits!(N, 1); + vget_lane_f32::(a) +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(nop, N = 1))] @@ -7847,6 +9168,7 @@ pub fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { #[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ext, N = 1))] #[rustc_legacy_const_generics(2)] @@ -7856,8 +9178,26 @@ pub fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } #[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ext, N = 1))] #[rustc_legacy_const_generics(2)] @@ -7866,6 +9206,23 @@ pub fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { static_assert_uimm_bits!(N, 1); unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"] #[inline] @@ -8193,6 +9550,7 @@ pub fn vfmad_laneq_f64(a: f64, b: f64, c: float64x2_t) -> f64 { #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8209,8 +9567,34 @@ pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3 unsafe { _vfmlal_high_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal2))] +pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v2f32.v4f16" + )] + fn _vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlal_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8226,6 +9610,31 @@ pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float } unsafe { _vfmlalq_high_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal2))] +pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v4f32.v8f16" + )] + fn _vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlalq_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_high_f16)"] #[inline] @@ -8365,6 +9774,7 @@ pub fn vfmlalq_laneq_low_f16( #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8381,8 +9791,34 @@ pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32 unsafe { _vfmlal_low_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal))] +pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v2f32.v4f16" + )] + fn _vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlal_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8398,9 +9834,35 @@ pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3 } unsafe { _vfmlalq_low_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal))] +pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v4f32.v8f16" + )] + fn _vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlalq_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8417,8 +9879,34 @@ pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3 unsafe { _vfmlsl_high_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v2f32.v4f16" + )] + fn _vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlsl_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8434,6 +9922,31 @@ pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float } unsafe { _vfmlslq_high_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16" + )] + fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlslq_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_high_f16)"] #[inline] @@ -8573,6 +10086,7 @@ pub fn vfmlslq_laneq_low_f16( #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8589,8 +10103,34 @@ pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32 unsafe { _vfmlsl_low_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl))] +pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v2f32.v4f16" + )] + fn _vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlsl_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -8606,6 +10146,31 @@ pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3 } unsafe { _vfmlslq_low_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl))] +pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v4f32.v8f16" + )] + fn _vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlslq_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"] #[inline] @@ -8931,24 +10496,53 @@ pub fn vfmsd_laneq_f64(a: f64, b: f64, c: float64x2_t) -> f64 { #[doc = "Duplicate vector element to vector or scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fmov))] +#[cfg_attr(test, assert_instr(nop))] pub fn vget_high_f64(a: float64x2_t) -> float64x1_t { unsafe { float64x1_t([simd_extract!(a, 1)]) } } #[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vget_low_f64(a: float64x2_t) -> float64x1_t { - unsafe { float64x1_t([simd_extract!(a, 0)]) } +pub fn vget_high_f64(a: float64x2_t) -> float64x1_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + float64x1_t([simd_extract!(a, 1)]) + } } #[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f64(a: float64x2_t) -> float64x1_t { + unsafe { float64x1_t([simd_extract!(a, 0)]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f64(a: float64x2_t) -> float64x1_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + float64x1_t([simd_extract!(a, 0)]) + } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"] +#[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[rustc_legacy_const_generics(1)] @@ -8957,6 +10551,21 @@ pub fn vgetq_lane_f64(a: float64x2_t) -> f64 { static_assert_uimm_bits!(IMM5, 1); unsafe { simd_extract!(a, IMM5 as u32) } } +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +pub fn vgetq_lane_f64(a: float64x2_t) -> f64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + simd_extract!(a, IMM5 as u32) + } +} #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] #[doc = "## Safety"] @@ -9469,7 +11078,6 @@ pub unsafe fn vld2_lane_u64(a: *const u64, b: uint64x1x2_t) -> #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld2r))] @@ -9477,47 +11085,16 @@ pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t { transmute(vld2q_dup_s64(transmute(a))) } #[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t { - let mut ret_val: poly64x2x2_t = transmute(vld2q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val -} -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld2r))] pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t { transmute(vld2q_dup_s64(transmute(a))) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t { - let mut ret_val: uint64x2x2_t = transmute(vld2q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val -} #[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"] #[doc = "## Safety"] @@ -9672,7 +11249,6 @@ pub unsafe fn vld2q_lane_p8(a: *const p8, b: poly8x16x2_t) -> p #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld2))] @@ -9680,21 +11256,6 @@ pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t { transmute(vld2q_s64(transmute(a))) } #[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t { - let mut ret_val: poly64x2x2_t = transmute(vld2q_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val -} -#[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] @@ -9853,7 +11414,6 @@ pub unsafe fn vld3_lane_u64(a: *const u64, b: uint64x1x3_t) -> #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3r))] @@ -9861,49 +11421,16 @@ pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t { transmute(vld3q_dup_s64(transmute(a))) } #[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t { - let mut ret_val: poly64x2x3_t = transmute(vld3q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3r))] pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t { transmute(vld3q_dup_s64(transmute(a))) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t { - let mut ret_val: uint64x2x3_t = transmute(vld3q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val -} #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"] #[doc = "## Safety"] @@ -10061,7 +11588,6 @@ pub unsafe fn vld3q_lane_p8(a: *const p8, b: poly8x16x3_t) -> p #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3))] @@ -10069,22 +11595,6 @@ pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t { transmute(vld3q_s64(transmute(a))) } #[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t { - let mut ret_val: poly64x2x3_t = transmute(vld3q_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val -} -#[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] @@ -10245,7 +11755,6 @@ pub unsafe fn vld4_lane_u64(a: *const u64, b: uint64x1x4_t) -> #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -10253,51 +11762,16 @@ pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t { transmute(vld4q_dup_s64(transmute(a))) } #[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(ld4r))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t { - let mut ret_val: poly64x2x4_t = transmute(vld4q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val -} -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t { transmute(vld4q_dup_s64(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ld4r))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t { - let mut ret_val: uint64x2x4_t = transmute(vld4q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val -} #[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"] #[doc = "## Safety"] @@ -10458,7 +11932,6 @@ pub unsafe fn vld4q_lane_p8(a: *const p8, b: poly8x16x4_t) -> p #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[target_feature(enable = "neon,aes")] #[cfg_attr(test, assert_instr(ld4))] @@ -10466,23 +11939,6 @@ pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t { transmute(vld4q_s64(transmute(a))) } #[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t { - let mut ret_val: poly64x2x4_t = transmute(vld4q_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val -} -#[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] @@ -13435,6 +14891,7 @@ pub fn vpaddd_u64(a: uint64x2_t) -> u64 { #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13447,8 +14904,27 @@ pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(faddp))] +pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + let ret_val: float16x8_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(faddp))] @@ -13460,103 +14936,129 @@ pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { } } #[doc = "Floating-point add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(faddp))] -pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { +pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); - simd_add(even, odd) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let ret_val: float32x4_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(test, assert_instr(faddp))] +pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); simd_add(even, odd) } } -#[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(test, assert_instr(faddp))] +pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); - simd_add(even, odd) + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + let ret_val: float64x2_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); simd_add(even, odd) } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); - simd_add(even, odd) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + let ret_val: int8x16_t = simd_add(even, odd); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); simd_add(even, odd) } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); - simd_add(even, odd) + let ret_val: int16x8_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); @@ -13564,21 +15066,187 @@ pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { +pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let ret_val: int32x4_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + simd_add(even, odd) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + let ret_val: int64x2_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + simd_add(even, odd) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + let ret_val: uint8x16_t = simd_add(even, odd); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + simd_add(even, odd) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + let ret_val: uint16x8_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + simd_add(even, odd) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let ret_val: uint32x4_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); simd_add(even, odd) } } +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + let ret_val: uint64x2_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13594,8 +15262,32 @@ pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpmax_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v4f16" + )] + fn _vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpmax_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13611,8 +15303,32 @@ pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vpmaxq_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v8f16" + )] + fn _vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpmaxq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13628,8 +15344,32 @@ pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpmaxnm_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f16" + )] + fn _vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpmaxnm_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13644,9 +15384,33 @@ pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { } unsafe { _vpmaxnmq_f16(a, b) } } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v8f16" + )] + fn _vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpmaxnmq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} #[doc = "Floating-point Maximum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -13661,24 +15425,70 @@ pub fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vpmaxnm_f32(a, b) } } #[doc = "Floating-point Maximum Number Pairwise (vector)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +pub fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnmp.v4f32" + link_name = "llvm.aarch64.neon.fmaxnmp.v2f32" )] - fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + fn _vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpmaxnm_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vpmaxnmq_f32(a, b) } } #[doc = "Floating-point Maximum Number Pairwise (vector)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f32" + )] + fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vpmaxnmq_f32(a, b) } +} +#[doc = "Floating-point Maximum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f32" + )] + fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpmaxnmq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point Maximum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"] +#[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -13692,9 +15502,32 @@ pub fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } unsafe { _vpmaxnmq_f64(a, b) } } +#[doc = "Floating-point Maximum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v2f64" + )] + fn _vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpmaxnmq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point maximum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -13709,8 +15542,29 @@ pub fn vpmaxnmqd_f64(a: float64x2_t) -> f64 { unsafe { _vpmaxnmqd_f64(a) } } #[doc = "Floating-point maximum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64" + )] + fn _vpmaxnmqd_f64(a: float64x2_t) -> f64; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + _vpmaxnmqd_f64(a) + } +} +#[doc = "Floating-point maximum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -13724,9 +15578,30 @@ pub fn vpmaxnms_f32(a: float32x2_t) -> f32 { } unsafe { _vpmaxnms_f32(a) } } +#[doc = "Floating-point maximum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnms_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32" + )] + fn _vpmaxnms_f32(a: float32x2_t) -> f32; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + _vpmaxnms_f32(a) + } +} #[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fmaxp))] @@ -13741,8 +15616,31 @@ pub fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpmaxq_f32(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v4f32" + )] + fn _vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpmaxq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fmaxp))] @@ -13757,8 +15655,31 @@ pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { _vpmaxq_f64(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v2f64" + )] + fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpmaxq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(smaxp))] @@ -13773,8 +15694,37 @@ pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { _vpmaxq_s8(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v16i8" + )] + fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = _vpmaxq_s8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(smaxp))] @@ -13789,8 +15739,31 @@ pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { _vpmaxq_s16(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v8i16" + )] + fn _vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = _vpmaxq_s16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(smaxp))] @@ -13805,8 +15778,31 @@ pub fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { _vpmaxq_s32(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v4i32" + )] + fn _vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = _vpmaxq_s32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(umaxp))] @@ -13821,8 +15817,37 @@ pub fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { _vpmaxq_u8(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(umaxp))] +pub fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v16i8" + )] + fn _vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = _vpmaxq_u8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(umaxp))] @@ -13837,8 +15862,31 @@ pub fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { _vpmaxq_u16(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(umaxp))] +pub fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v8i16" + )] + fn _vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = _vpmaxq_u16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(umaxp))] @@ -13852,6 +15900,28 @@ pub fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } unsafe { _vpmaxq_u32(a, b) } } +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(umaxp))] +pub fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v4i32" + )] + fn _vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vpmaxq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point maximum pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"] #[inline] @@ -13887,6 +15957,7 @@ pub fn vpmaxs_f32(a: float32x2_t) -> f32 { #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13902,8 +15973,32 @@ pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpmin_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v4f16" + )] + fn _vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpmin_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13919,8 +16014,32 @@ pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vpminq_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v8f16" + )] + fn _vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpminq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13936,8 +16055,32 @@ pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpminnm_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminnmp))] +pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v4f16" + )] + fn _vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpminnm_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -13952,9 +16095,33 @@ pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { } unsafe { _vpminnmq_f16(a, b) } } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminnmp))] +pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v8f16" + )] + fn _vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpminnmq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} #[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -13969,8 +16136,31 @@ pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vpminnm_f32(a, b) } } #[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v2f32" + )] + fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpminnm_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -13985,8 +16175,31 @@ pub fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpminnmq_f32(a, b) } } #[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v4f32" + )] + fn _vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpminnmq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -14000,9 +16213,32 @@ pub fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } unsafe { _vpminnmq_f64(a, b) } } +#[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v2f64" + )] + fn _vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpminnmq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point minimum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -14017,8 +16253,29 @@ pub fn vpminnmqd_f64(a: float64x2_t) -> f64 { unsafe { _vpminnmqd_f64(a) } } #[doc = "Floating-point minimum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnmqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64" + )] + fn _vpminnmqd_f64(a: float64x2_t) -> f64; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + _vpminnmqd_f64(a) + } +} +#[doc = "Floating-point minimum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -14032,9 +16289,30 @@ pub fn vpminnms_f32(a: float32x2_t) -> f32 { } unsafe { _vpminnms_f32(a) } } +#[doc = "Floating-point minimum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnms_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32" + )] + fn _vpminnms_f32(a: float32x2_t) -> f32; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + _vpminnms_f32(a) + } +} #[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -14049,8 +16327,31 @@ pub fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpminq_f32(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v4f32" + )] + fn _vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpminq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -14065,40 +16366,115 @@ pub fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { _vpminq_f64(a, b) } } #[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sminp))] -pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v16i8" + link_name = "llvm.aarch64.neon.fminp.v2f64" )] - fn _vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + fn _vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpminq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vpminq_s8(a, b) } } #[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sminp))] -pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v8i16" + link_name = "llvm.aarch64.neon.sminp.v16i8" + )] + fn _vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vpminq_s8(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v16i8" + )] + fn _vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = _vpminq_s8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v8i16" )] fn _vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; } unsafe { _vpminq_s16(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v8i16" + )] + fn _vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = _vpminq_s16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sminp))] @@ -14113,8 +16489,31 @@ pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { _vpminq_s32(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v4i32" + )] + fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = _vpminq_s32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(uminp))] @@ -14129,8 +16528,37 @@ pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { _vpminq_u8(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v16i8" + )] + fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = _vpminq_u8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(uminp))] @@ -14145,8 +16573,31 @@ pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { _vpminq_u16(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i16" + )] + fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = _vpminq_u16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(uminp))] @@ -14160,9 +16611,32 @@ pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } unsafe { _vpminq_u32(a, b) } } +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i32" + )] + fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vpminq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point minimum pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -14177,8 +16651,29 @@ pub fn vpminqd_f64(a: float64x2_t) -> f64 { unsafe { _vpminqd_f64(a) } } #[doc = "Floating-point minimum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f64.v2f64" + )] + fn _vpminqd_f64(a: float64x2_t) -> f64; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + _vpminqd_f64(a) + } +} +#[doc = "Floating-point minimum pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -14192,6 +16687,26 @@ pub fn vpmins_f32(a: float32x2_t) -> f32 { } unsafe { _vpmins_f32(a) } } +#[doc = "Floating-point minimum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpmins_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f32.v2f32" + )] + fn _vpmins_f32(a: float32x2_t) -> f32; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + _vpmins_f32(a) + } +} #[doc = "Signed saturating Absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)"] #[inline] @@ -14488,7 +17003,7 @@ pub fn vqdmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlalh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { @@ -14499,7 +17014,7 @@ pub fn vqdmlalh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlalh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 { @@ -14510,7 +17025,7 @@ pub fn vqdmlalh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlals_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { @@ -14521,7 +17036,7 @@ pub fn vqdmlals_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlals_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 { @@ -14532,7 +17047,7 @@ pub fn vqdmlals_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 { let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c)); @@ -14654,7 +17169,7 @@ pub fn vqdmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlslh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { @@ -14665,7 +17180,7 @@ pub fn vqdmlslh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlslh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 { @@ -14676,7 +17191,7 @@ pub fn vqdmlslh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsls_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { @@ -14687,7 +17202,7 @@ pub fn vqdmlsls_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsls_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 { @@ -14698,7 +17213,7 @@ pub fn vqdmlsls_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 { let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c)); @@ -16679,6 +19194,7 @@ fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t { #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16686,8 +19202,25 @@ pub fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t { vqtbl1(a, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl1(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16695,53 +19228,152 @@ pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { vqtbl1q(a, b) } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vqtbl1(transmute(a), b)) } +pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl1q(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vqtbl1q(transmute(a), b)) } +pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vqtbl1(transmute(a), b)) } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vqtbl1(transmute(a), b)) } +pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbl1(transmute(a), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t { +pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbl1q(transmute(a), b)) } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -fn vqtbl2(a: int8x16_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.tbl2.v8i8" - )] +pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbl1q(transmute(a), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vqtbl1(transmute(a), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbl1(transmute(a), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t { + unsafe { transmute(vqtbl1q(transmute(a), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbl1q(transmute(a), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +fn vqtbl2(a: int8x16_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.tbl2.v8i8" + )] fn _vqtbl2(a: int8x16_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t; } unsafe { _vqtbl2(a, b, c) } @@ -16765,6 +19397,7 @@ fn vqtbl2q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16772,8 +19405,34 @@ pub fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t { vqtbl2(a.0, a.1, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t { + let mut a: int8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl2(a.0, a.1, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16781,8 +19440,39 @@ pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t { vqtbl2q(a.0, a.1, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t { + let mut a: int8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl2q(a.0, a.1, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16790,8 +19480,34 @@ pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16799,8 +19515,39 @@ pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t { + let mut a: uint8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16808,8 +19555,34 @@ pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16817,6 +19590,36 @@ pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t { + let mut a: poly8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3)"] #[inline] #[target_feature(enable = "neon")] @@ -16851,6 +19654,7 @@ fn vqtbl3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16858,58 +19662,262 @@ pub fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t { vqtbl3(a.0, a.1, a.2, b) } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t { - vqtbl3q(a.0, a.1, a.2, b) +pub fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t { + let mut a: int8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl3(a.0, a.1, a.2, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t { + vqtbl3q(a.0, a.1, a.2, b) } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t { + let mut a: int8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl3q(a.0, a.1, a.2, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t { +pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t { - unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = + transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -fn vqtbl4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { +pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t { + let mut a: uint8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = + transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t { + unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t { + let mut a: poly8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +fn vqtbl4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.tbl4.v8i8" @@ -16949,6 +19957,7 @@ fn vqtbl4q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16956,8 +19965,44 @@ pub fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t { vqtbl4(a.0, a.1, a.2, a.3, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t { + let mut a: int8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl4(a.0, a.1, a.2, a.3, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16965,8 +20010,49 @@ pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t { vqtbl4q(a.0, a.1, a.2, a.3, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t { + let mut a: int8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl4q(a.0, a.1, a.2, a.3, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16982,8 +20068,50 @@ pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t { } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16999,8 +20127,55 @@ pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t { } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t { + let mut a: uint8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbl4q( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17016,8 +20191,50 @@ pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t { } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17032,6 +20249,52 @@ pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t { )) } } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t { + let mut a: poly8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbl4q( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1)"] #[inline] @@ -17067,6 +20330,7 @@ fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17074,17 +20338,59 @@ pub fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { vqtbx1(a, b, c) } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { +pub fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx1(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { vqtbx1q(a, b, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx1q(a, b, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17092,8 +20398,26 @@ pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx1(transmute(a), transmute(b), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17101,8 +20425,32 @@ pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbx1q(transmute(a), transmute(b), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17110,8 +20458,26 @@ pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx1(transmute(a), transmute(b), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17119,6 +20485,29 @@ pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbx1q(transmute(a), transmute(b), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2)"] #[inline] #[target_feature(enable = "neon")] @@ -17153,6 +20542,7 @@ fn vqtbx2q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17160,8 +20550,35 @@ pub fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t { vqtbx2(a, b.0, b.1, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t { + let mut b: int8x16x2_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx2(a, b.0, b.1, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17169,8 +20586,41 @@ pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t { vqtbx2q(a, b.0, b.1, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t { + let mut b: int8x16x2_t = b; + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx2q(a, b.0, b.1, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17178,8 +20628,35 @@ pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x16x2_t = b; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17187,8 +20664,42 @@ pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t { + let mut b: uint8x16x2_t = b; + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17196,8 +20707,35 @@ pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x16x2_t = b; + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17205,6 +20743,39 @@ pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t { + let mut b: poly8x16x2_t = b; + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3)"] #[inline] #[target_feature(enable = "neon")] @@ -17246,6 +20817,7 @@ fn vqtbx3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17253,34 +20825,142 @@ pub fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t { vqtbx3(a, b.0, b.1, b.2, c) } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t { - vqtbx3q(a, b.0, b.1, b.2, c) +pub fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t { + let mut b: int8x16x3_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx3(a, b.0, b.1, b.2, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t { - unsafe { - transmute(vqtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - c, - )) - } +pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t { + vqtbx3q(a, b.0, b.1, b.2, c) } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t { + let mut b: int8x16x3_t = b; + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx3q(a, b.0, b.1, b.2, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vqtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x16x3_t = b; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"] +#[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17296,8 +20976,52 @@ pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t { + let mut b: uint8x16x3_t = b; + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbx3q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17313,8 +21037,46 @@ pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x16x3_t = b; + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17330,6 +21092,49 @@ pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t { + let mut b: poly8x16x3_t = b; + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbx3q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4)"] #[inline] #[target_feature(enable = "neon")] @@ -17392,6 +21197,7 @@ fn vqtbx4q( #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17399,8 +21205,45 @@ pub fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t { vqtbx4(a, b.0, b.1, b.2, b.3, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t { + let mut b: int8x16x4_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx4(a, b.0, b.1, b.2, b.3, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17408,8 +21251,51 @@ pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t { vqtbx4q(a, b.0, b.1, b.2, b.3, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t { + let mut b: int8x16x4_t = b; + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx4q(a, b.0, b.1, b.2, b.3, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -17426,32 +21312,58 @@ pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t { } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t { +pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x16x4_t = b; unsafe { - transmute(vqtbx4q( + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx4( transmute(a), transmute(b.0), transmute(b.1), transmute(b.2), transmute(b.3), c, - )) + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t { +pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t { unsafe { - transmute(vqtbx4( + transmute(vqtbx4q( transmute(a), transmute(b.0), transmute(b.1), @@ -17462,32 +21374,193 @@ pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t { } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t { +pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t { + let mut b: uint8x16x4_t = b; unsafe { - transmute(vqtbx4q( + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbx4q( transmute(a), transmute(b.0), transmute(b.1), transmute(b.2), transmute(b.3), c, - )) + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Rotate and exclusive OR"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(rax1))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vqtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x16x4_t = b; + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t { + unsafe { + transmute(vqtbx4q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t { + let mut b: poly8x16x4_t = b; + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbx4q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Rotate and exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"] +#[inline] +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(rax1))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.rax1" )] @@ -17516,7 +21589,6 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t { #[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] @@ -17524,23 +21596,8 @@ pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t { unsafe { transmute(vrbit_s8(transmute(a))) } } #[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vrbit_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] @@ -17548,28 +21605,8 @@ pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t { unsafe { transmute(vrbitq_s8(transmute(a))) } } #[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(vrbitq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} -#[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] @@ -17577,48 +21614,14 @@ pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t { unsafe { transmute(vrbit_s8(transmute(a))) } } #[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vrbit_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t { unsafe { transmute(vrbitq_s8(transmute(a))) } } -#[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(vrbitq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} #[doc = "Reciprocal estimate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"] #[inline] @@ -17833,7 +21836,6 @@ pub fn vrecpxh_f16(a: f16) -> f16 { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -17842,23 +21844,8 @@ pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } -} -#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -17867,426 +21854,334 @@ pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { +pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { +pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { +pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { - unsafe { - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { +pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { +pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { +pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { +pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { +pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { +pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { - unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { +pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { - unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { +pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t { +pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { +pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { - unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { +pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { - unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { +pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { - unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t { +pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { +pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { +pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t { +pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -18294,7229 +22189,9626 @@ pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { +pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { +pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { +pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { +pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { +pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { +pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { +pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { +pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { +pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { +pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { +pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { +pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { +pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { +pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { unsafe { transmute(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32x_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32x.v2f32" + )] + fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t; } + unsafe { _vrnd32x_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32x.v4f32" + )] + fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t; } + unsafe { _vrnd32xq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32x.v2f64" + )] + fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd32xq_f64(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint32x.f64" + )] + fn _vrnd32x_f64(a: f64) -> f64; } + unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32z_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32z.v2f32" + )] + fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrnd32z_f32(a) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32z.v4f32" + )] + fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrnd32zq_f32(a) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32z.v2f64" + )] + fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd32zq_f64(a) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint32z.f64" + )] + fn _vrnd32z_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64x_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64x.v2f32" + )] + fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrnd64x_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64x.v4f32" + )] + fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrnd64xq_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64x.v2f64" + )] + fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd64xq_f64(a) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint64x.f64" + )] + fn _vrnd64x_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64z_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64z.v2f32" + )] + fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrnd64z_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64z.v4f32" + )] + fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrnd64zq_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64z.v2f64" + )] + fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd64zq_f64(a) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint64z.f64" + )] + fn _vrnd64z_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrnd_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrnd_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrnd_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrnda_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrnda_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndaq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrnda_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndah_f16(a: f16) -> f16 { + roundf16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndh_f16(a: f16) -> f16 { + truncf16(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndi_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v4f16" + )] + fn _vrndi_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrndi_f16(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v8f16" + )] + fn _vrndiq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrndiq_f16(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndi_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v2f32" + )] + fn _vrndi_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrndi_f32(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndiq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v4f32" + )] + fn _vrndiq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrndiq_f32(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndi_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v1f64" + )] + fn _vrndi_f64(a: float64x1_t) -> float64x1_t; + } + unsafe { _vrndi_f64(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v2f64" + )] + fn _vrndiq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrndiq_f64(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndih_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.f16" + )] + fn _vrndih_f16(a: f16) -> f16; + } + unsafe { _vrndih_f16(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndm_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndm_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndm_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmh_f16(a: f16) -> f16 { + floorf16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndn_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v1f64" + )] + fn _vrndn_f64(a: float64x1_t) -> float64x1_t; + } + unsafe { _vrndn_f64(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v2f64" + )] + fn _vrndnq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrndnq_f64(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndnh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.f16" + )] + fn _vrndnh_f16(a: f16) -> f16; + } + unsafe { _vrndnh_f16(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndns_f32(a: f32) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.f32" + )] + fn _vrndns_f32(a: f32) -> f32; + } + unsafe { _vrndns_f32(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndp_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndp_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndpq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndp_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndph_f16(a: f16) -> f16 { + ceilf16(a) +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndx_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndx_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndx_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxh_f16(a: f16) -> f16 { + round_ties_even_f16(a) +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshld_s64(a: i64, b: i64) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.i64" + )] + fn _vrshld_s64(a: i64, b: i64) -> i64; + } + unsafe { _vrshld_s64(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshld_u64(a: u64, b: i64) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.i64" + )] + fn _vrshld_u64(a: u64, b: i64) -> u64; + } + unsafe { _vrshld_u64(a, b) } +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrd_n_s64(a: i64) -> i64 { + static_assert!(N >= 1 && N <= 64); + vrshld_s64(a, -N as i64) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrd_n_u64(a: u64) -> u64 { + static_assert!(N >= 1 && N <= 64); + vrshld_u64(a, -N as i64) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_s8(a, vrshrn_n_s16::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_s16(a, vrshrn_n_s32::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_s32(a, vrshrn_n_s64::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_u8(a, vrshrn_n_u16::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_u16(a, vrshrn_n_u32::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_u32(a, vrshrn_n_u64::(b)) +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrte_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v1f64" + )] + fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t; + } + unsafe { _vrsqrte_f64(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v2f64" + )] + fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrsqrteq_f64(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrted_f64(a: f64) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f64" + )] + fn _vrsqrted_f64(a: f64) -> f64; + } + unsafe { _vrsqrted_f64(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtes_f32(a: f32) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f32" + )] + fn _vrsqrtes_f32(a: f32) -> f32; + } + unsafe { _vrsqrtes_f32(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"] +#[inline] +#[cfg_attr(test, assert_instr(frsqrte))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrteh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f16" + )] + fn _vrsqrteh_f16(a: f16) -> f16; + } + unsafe { _vrsqrteh_f16(a) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v1f64" + )] + fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; + } + unsafe { _vrsqrts_f64(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v2f64" + )] + fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { _vrsqrtsq_f64(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtsd_f64(a: f64, b: f64) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f64" + )] + fn _vrsqrtsd_f64(a: f64, b: f64) -> f64; + } + unsafe { _vrsqrtsd_f64(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f32" + )] + fn _vrsqrtss_f32(a: f32, b: f32) -> f32; + } + unsafe { _vrsqrtss_f32(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f16" + )] + fn _vrsqrtsh_f16(a: f16, b: f16) -> f16; + } + unsafe { _vrsqrtsh_f16(a, b) } +} +#[doc = "Signed rounding shift right and accumulate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsrad_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N >= 1 && N <= 64); + let b: i64 = vrshrd_n_s64::(b); + a.wrapping_add(b) +} +#[doc = "Unsigned rounding shift right and accumulate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsrad_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N >= 1 && N <= 64); + let b: u64 = vrshrd_n_u64::(b); + a.wrapping_add(b) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + vcombine_s8(a, vrsubhn_s16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + vcombine_s16(a, vrsubhn_s32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + vcombine_s32(a, vrsubhn_s64(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + vcombine_u8(a, vrsubhn_u16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + vcombine_u16(a, vrsubhn_u32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + vcombine_u32(a, vrsubhn_u64(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + vcombine_s8(a, vrsubhn_s16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + vcombine_s16(a, vrsubhn_s32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + vcombine_s32(a, vrsubhn_s64(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + vcombine_u8(a, vrsubhn_u16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + vcombine_u16(a, vrsubhn_u32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + vcombine_u32(a, vrsubhn_u64(b, c)) +} +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"] +#[inline] +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v4f16" + )] + fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t; + } + unsafe { _vscale_f16(vn, vm) } +} +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"] +#[inline] +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v8f16" + )] + fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t; + } + unsafe { _vscaleq_f16(vn, vm) } +} +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"] +#[inline] +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v2f32" + )] + fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t; + } + unsafe { _vscale_f32(vn, vm) } +} +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"] +#[inline] +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v4f32" + )] + fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t; + } + unsafe { _vscaleq_f32(vn, vm) } +} +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"] +#[inline] +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v2f64" + )] + fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t; + } + unsafe { _vscaleq_f64(vn, vm) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "SHA512 hash update part 2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h2))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h2" + )] + fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512h2q_u64(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[doc = "SHA512 hash update part 2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h2))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h2" + )] + fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: uint64x2_t = _vsha512h2q_u64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[doc = "SHA512 hash update part 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h" + )] + fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512hq_u64(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[doc = "SHA512 hash update part 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h" + )] + fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: uint64x2_t = _vsha512hq_u64(a, b, c); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"] +#[doc = "SHA512 schedule update 0"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su0))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su0" + )] + fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512su0q_u64(a, b) } +} +#[doc = "SHA512 schedule update 0"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su0))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su0" + )] + fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = _vsha512su0q_u64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "SHA512 schedule update 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su1))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su1" + )] + fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512su1q_u64(a, b, c) } +} +#[doc = "SHA512 schedule update 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su1))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su1" + )] + fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: uint64x2_t = _vsha512su1q_u64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sshl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vshld_s64(a: i64, b: i64) -> i64 { + unsafe { transmute(vshl_s64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ushl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vshld_u64(a: u64, b: i64) -> u64 { + unsafe { transmute(vshl_u64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { + static_assert!(N >= 0 && N <= 8); + let b = vget_high_s8(a); + vshll_n_s8::(b) +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 16); + let b = vget_high_s16(a); + vshll_n_s16::(b) +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 32); + let b = vget_high_s32(a); + vshll_n_s32::(b) +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { + static_assert!(N >= 0 && N <= 8); + let b: uint8x8_t = vget_high_u8(a); + vshll_n_u8::(b) +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 16); + let b: uint16x4_t = vget_high_u16(a); + vshll_n_u16::(b) +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 32); + let b: uint32x2_t = vget_high_u32(a); + vshll_n_u32::(b) +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_s8(a, vshrn_n_s16::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_s16(a, vshrn_n_s32::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_s32(a, vshrn_n_s64::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_u8(a, vshrn_n_u16::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_u16(a, vshrn_n_u32::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_u32(a, vshrn_n_u64::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v8i8" + )] + fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t; + } + unsafe { _vsli_n_s8(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v16i8" + )] + fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t; } + unsafe { _vsliq_n_s8(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v4i16" + )] + fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t; + } + unsafe { _vsli_n_s16(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v8i16" + )] + fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t; } + unsafe { _vsliq_n_s16(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v2i32" + )] + fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t; + } + unsafe { _vsli_n_s32(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v4i32" + )] + fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t; } + unsafe { _vsliq_n_s32(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v1i64" + )] + fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t; + } + unsafe { _vsli_n_s64(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) +pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v2i64" + )] + fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t; } + unsafe { _vsliq_n_s64(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vsli_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vsliq_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { - unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsli_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vsliq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] +#[doc = "Shift left and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { - unsafe { transmute(a) } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] +pub fn vslid_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] +#[doc = "Shift left and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] +pub fn vslid_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_u64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] +#[doc = "SM3PARTW1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3partw1" + )] + fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsm3partw1q_u32(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] +#[doc = "SM3PARTW1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3partw1" + )] + fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3partw1q_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"] +#[doc = "SM3PARTW2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t { - unsafe { transmute(a) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw2))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3partw2" + )] + fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsm3partw2q_u32(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"] +#[doc = "SM3PARTW2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t { - unsafe { transmute(a) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw2))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3partw2" + )] + fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3partw2q_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] +#[doc = "SM3SS1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3ss1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3ss1" + )] + fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsm3ss1q_u32(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] +#[doc = "SM3SS1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3ss1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3ss1" + )] + fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3ss1q_u32(a, b, c); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] +#[doc = "SM3TT1A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3tt1a" + )] + fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { _vsm3tt1aq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] +#[doc = "SM3TT1A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3tt1a" + )] + fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt1aq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] +#[doc = "SM3TT1B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3tt1b" + )] + fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { _vsm3tt1bq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] +#[doc = "SM3TT1B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3tt1b" + )] + fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt1bq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] +#[doc = "SM3TT2A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3tt2a" + )] + fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { _vsm3tt2aq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] +#[doc = "SM3TT2A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm3tt2a" + )] + fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt2aq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"] +#[doc = "SM3TT2B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32x_f32(a: float32x2_t) -> float32x2_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32x.v2f32" + link_name = "llvm.aarch64.crypto.sm3tt2b" )] - fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t; + fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; } - unsafe { _vrnd32x_f32(a) } + unsafe { _vsm3tt2bq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"] +#[doc = "SM3TT2B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32x.v4f32" + link_name = "llvm.aarch64.crypto.sm3tt2b" )] - fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t; + fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt2bq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrnd32xq_f32(a) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"] +#[doc = "SM4 key"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4ekey))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32x.v2f64" + link_name = "llvm.aarch64.crypto.sm4ekey" )] - fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t; + fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } - unsafe { _vrnd32xq_f64(a) } + unsafe { _vsm4ekeyq_u32(a, b) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"] +#[doc = "SM4 key"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4ekey))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint32x.f64" + link_name = "llvm.aarch64.crypto.sm4ekey" )] - fn _vrnd32x_f64(a: f64) -> f64; + fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm4ekeyq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"] +#[doc = "SM4 encode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32z_f32(a: float32x2_t) -> float32x2_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4e))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32z.v2f32" + link_name = "llvm.aarch64.crypto.sm4e" )] - fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t; + fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } - unsafe { _vrnd32z_f32(a) } + unsafe { _vsm4eq_u32(a, b) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"] +#[doc = "SM4 encode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4e))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32z.v4f32" + link_name = "llvm.aarch64.crypto.sm4e" )] - fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t; + fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm4eq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrnd32zq_f32(a) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u8)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32z.v2f64" + link_name = "llvm.aarch64.neon.usqadd.v8i8" )] - fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t; + fn _vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; } - unsafe { _vrnd32zq_f64(a) } + unsafe { _vsqadd_u8(a, b) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u8)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint32z.f64" + link_name = "llvm.aarch64.neon.usqadd.v16i8" )] - fn _vrnd32z_f64(a: f64) -> f64; + fn _vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; } - unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) } + unsafe { _vsqaddq_u8(a, b) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u16)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64x_f32(a: float32x2_t) -> float32x2_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64x.v2f32" + link_name = "llvm.aarch64.neon.usqadd.v4i16" )] - fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t; + fn _vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; } - unsafe { _vrnd64x_f32(a) } + unsafe { _vsqadd_u16(a, b) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u16)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64x.v4f32" + link_name = "llvm.aarch64.neon.usqadd.v8i16" )] - fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t; + fn _vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; } - unsafe { _vrnd64xq_f32(a) } + unsafe { _vsqaddq_u16(a, b) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64x.v2f64" + link_name = "llvm.aarch64.neon.usqadd.v2i32" )] - fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t; + fn _vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; } - unsafe { _vrnd64xq_f64(a) } + unsafe { _vsqadd_u32(a, b) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint64x.f64" + link_name = "llvm.aarch64.neon.usqadd.v4i32" )] - fn _vrnd64x_f64(a: f64) -> f64; + fn _vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; } - unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) } + unsafe { _vsqaddq_u32(a, b) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u64)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64z_f32(a: float32x2_t) -> float32x2_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64z.v2f32" + link_name = "llvm.aarch64.neon.usqadd.v1i64" )] - fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t; + fn _vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; } - unsafe { _vrnd64z_f32(a) } + unsafe { _vsqadd_u64(a, b) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u64)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64z.v4f32" + link_name = "llvm.aarch64.neon.usqadd.v2i64" )] - fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t; + fn _vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; } - unsafe { _vrnd64zq_f32(a) } + unsafe { _vsqaddq_u64(a, b) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"] +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usqadd))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsqaddb_u8(a: u8, b: i8) -> u8 { + vget_lane_u8::<0>(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b))) +} +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usqadd))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsqaddh_u16(a: u16, b: i16) -> u16 { + vget_lane_u16::<0>(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b))) +} +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usqadd))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsqaddd_u64(a: u64, b: i64) -> u64 { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64z.v2f64" + link_name = "llvm.aarch64.neon.usqadd.i64" )] - fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t; + fn _vsqaddd_u64(a: u64, b: i64) -> u64; } - unsafe { _vrnd64zq_f64(a) } + unsafe { _vsqaddd_u64(a, b) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"] +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usqadd))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsqadds_u32(a: u32, b: i32) -> u32 { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint64z.f64" + link_name = "llvm.aarch64.neon.usqadd.i32" )] - fn _vrnd64z_f64(a: f64) -> f64; + fn _vsqadds_u32(a: u32, b: i32) -> u32; } - unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) } + unsafe { _vsqadds_u32(a, b) } } -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f16)"] #[inline] +#[cfg_attr(test, assert_instr(fsqrt))] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrnd_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_trunc(a) } +pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f16)"] #[inline] +#[cfg_attr(test, assert_instr(fsqrt))] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_trunc(a) } +pub fn vsqrtq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrnd_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_trunc(a) } +pub fn vsqrt_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_trunc(a) } +pub fn vsqrtq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrnd_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_trunc(a) } +pub fn vsqrt_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_trunc(a) } +pub fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"] +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrth_f16)"] #[inline] #[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrnda_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_round(a) } +#[cfg_attr(test, assert_instr(fsqrt))] +pub fn vsqrth_f16(a: f16) -> f16 { + sqrtf16(a) } -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_round(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { super::shift_right_and_insert!(u8, 8, N, a, b) } } -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrnda_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_round(a) } +pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { super::shift_right_and_insert!(u8, 16, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { super::shift_right_and_insert!(u16, 4, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { super::shift_right_and_insert!(u16, 8, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { super::shift_right_and_insert!(u32, 2, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { super::shift_right_and_insert!(u32, 4, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { super::shift_right_and_insert!(u64, 1, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { super::shift_right_and_insert!(u64, 2, N, a, b) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndaq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_round(a) } +pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrnda_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_round(a) } +pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vsri_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndah_f16(a: f16) -> f16 { - roundf16(a) -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndh_f16(a: f16) -> f16 { - truncf16(a) -} -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndi_f16(a: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v4f16" - )] - fn _vrndi_f16(a: float16x4_t) -> float16x4_t; - } - unsafe { _vrndi_f16(a) } -} -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v8f16" - )] - fn _vrndiq_f16(a: float16x8_t) -> float16x8_t; - } - unsafe { _vrndiq_f16(a) } +pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vsriq_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndi_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v2f32" - )] - fn _vrndi_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrndi_f32(a) } +pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndiq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v4f32" - )] - fn _vrndiq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrndiq_f32(a) } +pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndi_f64(a: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v1f64" - )] - fn _vrndi_f64(a: float64x1_t) -> float64x1_t; - } - unsafe { _vrndi_f64(a) } +pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v2f64" - )] - fn _vrndiq_f64(a: float64x2_t) -> float64x2_t; - } - unsafe { _vrndiq_f64(a) } -} -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndih_f16(a: f16) -> f16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.f16" - )] - fn _vrndih_f16(a: f16) -> f16; - } - unsafe { _vrndih_f16(a) } +pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndm_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_floor(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_floor(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndm_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_floor(a) } +pub fn vsri_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_floor(a) } +pub fn vsriq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"] +#[doc = "Shift right and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndm_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_floor(a) } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] +pub fn vsrid_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"] +#[doc = "Shift right and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_floor(a) } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] +pub fn vsrid_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_u64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmh_f16(a: f16) -> f16 { - floorf16(a) -} -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndn_f64(a: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.v1f64" - )] - fn _vrndn_f64(a: float64x1_t) -> float64x1_t; - } - unsafe { _vrndn_f64(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.v2f64" - )] - fn _vrndnq_f64(a: float64x2_t) -> float64x2_t; - } - unsafe { _vrndnq_f64(a) } +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndnh_f16(a: f16) -> f16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.f16" - )] - fn _vrndnh_f16(a: f16) -> f16; - } - unsafe { _vrndnh_f16(a) } +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndns_f32(a: f32) -> f32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.f32" - )] - fn _vrndns_f32(a: f32) -> f32; - } - unsafe { _vrndns_f32(a) } +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndp_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_ceil(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_ceil(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndp_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_ceil(a) } +pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndpq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_ceil(a) } +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndp_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_ceil(a) } +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_ceil(a) } +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndph_f16(a: f16) -> f16 { - ceilf16(a) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndx_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_round_ties_even(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_round_ties_even(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndx_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_round_ties_even(a) } +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_round_ties_even(a) } +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndx_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_round_ties_even(a) } +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_round_ties_even(a) } -} -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxh_f16(a: f16) -> f16 { - round_ties_even_f16(a) +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(srshl))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshld_s64(a: i64, b: i64) -> i64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.i64" - )] - fn _vrshld_s64(a: i64, b: i64) -> i64; - } - unsafe { _vrshld_s64(a, b) } +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(urshl))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshld_u64(a: u64, b: i64) -> u64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.i64" - )] - fn _vrshld_u64(a: u64, b: i64) -> u64; - } - unsafe { _vrshld_u64(a, b) } +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrd_n_s64(a: i64) -> i64 { - static_assert!(N >= 1 && N <= 64); - vrshld_s64(a, -N as i64) +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrd_n_u64(a: u64) -> u64 { - static_assert!(N >= 1 && N <= 64); - vrshld_u64(a, -N as i64) +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - vcombine_s8(a, vrshrn_n_s16::(b)) +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - vcombine_s16(a, vrshrn_n_s32::(b)) +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - vcombine_s32(a, vrshrn_n_s64::(b)) +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - vcombine_u8(a, vrshrn_n_u16::(b)) +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - vcombine_u16(a, vrshrn_n_u32::(b)) +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - vcombine_u32(a, vrshrn_n_u64::(b)) +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrte_f64(a: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v1f64" - )] - fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t; - } - unsafe { _vrsqrte_f64(a) } +pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v2f64" - )] - fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t; - } - unsafe { _vrsqrteq_f64(a) } +pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrted_f64(a: f64) -> f64 { +pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.f64" + link_name = "llvm.aarch64.neon.st1x2.v1f64.p0" )] - fn _vrsqrted_f64(a: f64) -> f64; + fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64); } - unsafe { _vrsqrted_f64(a) } + _vst1_f64_x2(b.0, b.1, a) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtes_f32(a: f32) -> f32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.f32" - )] - fn _vrsqrtes_f32(a: f32) -> f32; - } - unsafe { _vrsqrtes_f32(a) } -} -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"] -#[inline] -#[cfg_attr(test, assert_instr(frsqrte))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrteh_f16(a: f16) -> f16 { +pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.f16" + link_name = "llvm.aarch64.neon.st1x2.v2f64.p0" )] - fn _vrsqrteh_f16(a: f16) -> f16; + fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64); } - unsafe { _vrsqrteh_f16(a) } + _vst1q_f64_x2(b.0, b.1, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { +pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v1f64" + link_name = "llvm.aarch64.neon.st1x3.v1f64.p0" )] - fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; + fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64); } - unsafe { _vrsqrts_f64(a, b) } + _vst1_f64_x3(b.0, b.1, b.2, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { +pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v2f64" + link_name = "llvm.aarch64.neon.st1x3.v2f64.p0" )] - fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64); } - unsafe { _vrsqrtsq_f64(a, b) } + _vst1q_f64_x3(b.0, b.1, b.2, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtsd_f64(a: f64, b: f64) -> f64 { +pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.f64" + link_name = "llvm.aarch64.neon.st1x4.v1f64.p0" )] - fn _vrsqrtsd_f64(a: f64, b: f64) -> f64; + fn _vst1_f64_x4( + a: float64x1_t, + b: float64x1_t, + c: float64x1_t, + d: float64x1_t, + ptr: *mut f64, + ); } - unsafe { _vrsqrtsd_f64(a, b) } + _vst1_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.f32" - )] - fn _vrsqrtss_f32(a: f32, b: f32) -> f32; - } - unsafe { _vrsqrtss_f32(a, b) } -} -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(test, assert_instr(frsqrts))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 { +pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.f16" + link_name = "llvm.aarch64.neon.st1x4.v2f64.p0" )] - fn _vrsqrtsh_f16(a: f16, b: f16) -> f16; + fn _vst1q_f64_x4( + a: float64x2_t, + b: float64x2_t, + c: float64x2_t, + d: float64x2_t, + ptr: *mut f64, + ); } - unsafe { _vrsqrtsh_f16(a, b) } + _vst1q_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Signed rounding shift right and accumulate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsrad_n_s64(a: i64, b: i64) -> i64 { - static_assert!(N >= 1 && N <= 64); - let b: i64 = vrshrd_n_s64::(b); - a.wrapping_add(b) +pub unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Unsigned rounding shift right and accumulate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsrad_n_u64(a: u64, b: u64) -> u64 { - static_assert!(N >= 1 && N <= 64); - let b: u64 = vrshrd_n_u64::(b); - a.wrapping_add(b) -} -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - vcombine_s8(a, vrsubhn_s16(b, c)) -} -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - vcombine_s16(a, vrsubhn_s32(b, c)) -} -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - vcombine_s32(a, vrsubhn_s64(b, c)) -} -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - vcombine_u8(a, vrsubhn_u16(b, c)) -} -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - vcombine_u16(a, vrsubhn_u32(b, c)) +pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - vcombine_u32(a, vrsubhn_u64(b, c)) +#[cfg_attr(test, assert_instr(stp))] +pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - vcombine_s8(a, vrsubhn_s16(b, c)) +pub unsafe fn vst2_lane_f64(a: *mut f64, b: float64x1x2_t) { + static_assert!(LANE == 0); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v1f64.p0" + )] + fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - vcombine_s16(a, vrsubhn_s32(b, c)) +pub unsafe fn vst2_lane_s64(a: *mut i64, b: int64x1x2_t) { + static_assert!(LANE == 0); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v1i64.p0" + )] + fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - vcombine_s32(a, vrsubhn_s64(b, c)) +pub unsafe fn vst2_lane_p64(a: *mut p64, b: poly64x1x2_t) { + static_assert!(LANE == 0); + vst2_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - vcombine_u8(a, vrsubhn_u16(b, c)) +pub unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t) { + static_assert!(LANE == 0); + vst2_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - vcombine_u16(a, vrsubhn_u32(b, c)) +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { + crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - vcombine_u32(a, vrsubhn_u64(b, c)) +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) { + crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_f64(a: *mut f64, b: float64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v4f16" + link_name = "llvm.aarch64.neon.st2lane.v2f64.p0" )] - fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t; + fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8); } - unsafe { _vscale_f16(vn, vm) } + _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s8(a: *mut i8, b: int8x16x2_t) { + static_assert_uimm_bits!(LANE, 4); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v8f16" + link_name = "llvm.aarch64.neon.st2lane.v16i8.p0" )] - fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t; + fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8); } - unsafe { _vscaleq_f16(vn, vm) } + _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s64(a: *mut i64, b: int64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v2f32" + link_name = "llvm.aarch64.neon.st2lane.v2i64.p0" )] - fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t; + fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8); } - unsafe { _vscale_f32(vn, vm) } + _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v4f32" - )] - fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t; - } - unsafe { _vscaleq_f32(vn, vm) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_p64(a: *mut p64, b: poly64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v2f64" - )] - fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t; - } - unsafe { _vscaleq_f64(vn, vm) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_u8(a: *mut u8, b: uint8x16x2_t) { + static_assert_uimm_bits!(LANE, 4); + vst2q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x1_t { - static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst2q_lane_u64(a: *mut u64, b: uint64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst2q_lane_p8(a: *mut p8, b: poly8x16x2_t) { + static_assert_uimm_bits!(LANE, 4); + vst2q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "SHA512 hash update part 2"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512h2))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512h2" - )] - fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; - } - unsafe { _vsha512h2q_u64(a, b, c) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) { + vst2q_s64(transmute(a), transmute(b)) } -#[doc = "SHA512 hash update part 1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) { + vst2q_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512h))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512h" - )] - fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; - } - unsafe { _vsha512hq_u64(a, b, c) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "SHA512 schedule update 0"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512su0))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_f64(a: *mut f64, b: float64x1x3_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512su0" + link_name = "llvm.aarch64.neon.st3lane.v1f64.p0" )] - fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8); } - unsafe { _vsha512su0q_u64(a, b) } + _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "SHA512 schedule update 1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512su1))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s64(a: *mut i64, b: int64x1x3_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512su1" + link_name = "llvm.aarch64.neon.st3lane.v1i64.p0" )] - fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8); } - unsafe { _vsha512su1q_u64(a, b, c) } + _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshld_s64(a: i64, b: i64) -> i64 { - unsafe { transmute(vshl_s64(transmute(a), transmute(b))) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_p64(a: *mut p64, b: poly64x1x3_t) { + static_assert!(LANE == 0); + vst3_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ushl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshld_u64(a: u64, b: i64) -> u64 { - unsafe { transmute(vshl_u64(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t) { + static_assert!(LANE == 0); + vst3_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] -#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { - static_assert!(N >= 0 && N <= 8); - let b = vget_high_s8(a); - vshll_n_s8::(b) +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { + crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] -#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 16); - let b = vget_high_s16(a); - vshll_n_s16::(b) +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) { + crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 32); - let b = vget_high_s32(a); - vshll_n_s32::(b) +pub unsafe fn vst3q_lane_f64(a: *mut f64, b: float64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v2f64.p0" + )] + fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { - static_assert!(N >= 0 && N <= 8); - let b: uint8x8_t = vget_high_u8(a); - vshll_n_u8::(b) +pub unsafe fn vst3q_lane_s8(a: *mut i8, b: int8x16x3_t) { + static_assert_uimm_bits!(LANE, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v16i8.p0" + )] + fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 16); - let b: uint16x4_t = vget_high_u16(a); - vshll_n_u16::(b) +pub unsafe fn vst3q_lane_s64(a: *mut i64, b: int64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v2i64.p0" + )] + fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] -#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 32); - let b: uint32x2_t = vget_high_u32(a); - vshll_n_u32::(b) -} -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - vcombine_s8(a, vshrn_n_s16::(b)) +pub unsafe fn vst3q_lane_p64(a: *mut p64, b: poly64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - vcombine_s16(a, vshrn_n_s32::(b)) +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_u8(a: *mut u8, b: uint8x16x3_t) { + static_assert_uimm_bits!(LANE, 4); + vst3q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - vcombine_s32(a, vshrn_n_s64::(b)) +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_u64(a: *mut u64, b: uint64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] #[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_p8(a: *mut p8, b: poly8x16x3_t) { + static_assert_uimm_bits!(LANE, 4); + vst3q_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - vcombine_u8(a, vshrn_n_u16::(b)) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) { + vst3q_s64(transmute(a), transmute(b)) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - vcombine_u16(a, vshrn_n_u32::(b)) +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) { + vst3q_s64(transmute(a), transmute(b)) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - vcombine_u32(a, vshrn_n_u64::(b)) +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); +pub unsafe fn vst4_lane_f64(a: *mut f64, b: float64x1x4_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v8i8" + link_name = "llvm.aarch64.neon.st4lane.v1f64.p0" )] - fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t; + fn _vst4_lane_f64( + a: float64x1_t, + b: float64x1_t, + c: float64x1_t, + d: float64x1_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsli_n_s8(a, b, N) } + _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); +pub unsafe fn vst4_lane_s64(a: *mut i64, b: int64x1x4_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v16i8" + link_name = "llvm.aarch64.neon.st4lane.v1i64.p0" )] - fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t; + fn _vst4_lane_s64( + a: int64x1_t, + b: int64x1_t, + c: int64x1_t, + d: int64x1_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsliq_n_s8(a, b, N) } + _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v4i16" - )] - fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t; - } - unsafe { _vsli_n_s16(a, b, N) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_p64(a: *mut p64, b: poly64x1x4_t) { + static_assert!(LANE == 0); + vst4_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t) { + static_assert!(LANE == 0); + vst4_lane_s64::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v8i16" - )] - fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t; - } - unsafe { _vsliq_n_s16(a, b, N) } +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { + crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 0 && N <= 31); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v2i32" - )] - fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t; - } - unsafe { _vsli_n_s32(a, b, N) } +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) { + crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 31); +pub unsafe fn vst4q_lane_f64(a: *mut f64, b: float64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v4i32" + link_name = "llvm.aarch64.neon.st4lane.v2f64.p0" )] - fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t; + fn _vst4q_lane_f64( + a: float64x2_t, + b: float64x2_t, + c: float64x2_t, + d: float64x2_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsliq_n_s32(a, b, N) } + _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 0 && N <= 63); +pub unsafe fn vst4q_lane_s8(a: *mut i8, b: int8x16x4_t) { + static_assert_uimm_bits!(LANE, 4); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v1i64" + link_name = "llvm.aarch64.neon.st4lane.v16i8.p0" )] - fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t; + fn _vst4q_lane_s8( + a: int8x16_t, + b: int8x16_t, + c: int8x16_t, + d: int8x16_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsli_n_s64(a, b, N) } + _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 63); +pub unsafe fn vst4q_lane_s64(a: *mut i64, b: int64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v2i64" + link_name = "llvm.aarch64.neon.st4lane.v2i64.p0" )] - fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t; + fn _vst4q_lane_s64( + a: int64x2_t, + b: int64x2_t, + c: int64x2_t, + d: int64x2_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsliq_n_s64(a, b, N) } + _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_p64(a: *mut p64, b: poly64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_u8(a: *mut u8, b: uint8x16x4_t) { + static_assert_uimm_bits!(LANE, 4); + vst4q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_u64(a: *mut u64, b: uint64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_p8(a: *mut p8, b: poly8x16x4_t) { + static_assert_uimm_bits!(LANE, 4); + vst4q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vsli_n_s32::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) { + vst4q_s64(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vsliq_n_s32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) { + vst4q_s64(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_f64(ptr: *mut f64, val: float64x1_t) { + static_assert!(LANE == 0); + vstl1_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_f64(ptr: *mut f64, val: float64x2_t) { + static_assert_uimm_bits!(LANE, 1); + vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_u64(ptr: *mut u64, val: uint64x1_t) { + static_assert!(LANE == 0); + vstl1_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_u64(ptr: *mut u64, val: uint64x2_t) { + static_assert_uimm_bits!(LANE, 1); + vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_p64(ptr: *mut p64, val: poly64x1_t) { + static_assert!(LANE == 0); + vstl1_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_p64(ptr: *mut p64, val: poly64x2_t) { + static_assert_uimm_bits!(LANE, 1); + vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_s64(ptr: *mut i64, val: int64x1_t) { + static_assert!(LANE == 0); + let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; + let lane: i64 = vget_lane_s64::(val); + (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] #[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_s64(ptr: *mut i64, val: int64x2_t) { + static_assert_uimm_bits!(LANE, 1); + let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; + let lane: i64 = vgetq_lane_s64::(val); + (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"] +#[inline] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(fsub))] +pub fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + unsafe { simd_sub(a, b) } } -#[doc = "Shift left and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] -pub fn vslid_n_s64(a: i64, b: i64) -> i64 { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(fsub))] +pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Shift left and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] -pub fn vslid_n_u64(a: u64, b: u64) -> u64 { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_u64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(sub))] +pub fn vsubd_s64(a: i64, b: i64) -> i64 { + a.wrapping_sub(b) } -#[doc = "SM3PARTW1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3partw1))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3partw1" - )] - fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsm3partw1q_u32(a, b, c) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(sub))] +pub fn vsubd_u64(a: u64, b: u64) -> u64 { + a.wrapping_sub(b) } -#[doc = "SM3PARTW2"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3partw2))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3partw2" - )] - fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsm3partw2q_u32(a, b, c) } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fsub))] +pub fn vsubh_f16(a: f16, b: f16) -> f16 { + a - b } -#[doc = "SM3SS1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3ss1))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3ss1" - )] - fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] +pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + unsafe { + let c: int16x8_t = simd_cast(vget_high_s8(a)); + let d: int16x8_t = simd_cast(vget_high_s8(b)); + simd_sub(c, d) } - unsafe { _vsm3ss1q_u32(a, b, c) } } -#[doc = "SM3TT1A"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt1a" - )] - fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] +pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + unsafe { + let c: int32x4_t = simd_cast(vget_high_s16(a)); + let d: int32x4_t = simd_cast(vget_high_s16(b)); + simd_sub(c, d) } - unsafe { _vsm3tt1aq_u32(a, b, c, IMM2 as i64) } } -#[doc = "SM3TT1B"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt1b" - )] - fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] +pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + unsafe { + let c: int64x2_t = simd_cast(vget_high_s32(a)); + let d: int64x2_t = simd_cast(vget_high_s32(b)); + simd_sub(c, d) } - unsafe { _vsm3tt1bq_u32(a, b, c, IMM2 as i64) } } -#[doc = "SM3TT2A"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt2a" - )] - fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] +pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { + unsafe { + let c: uint16x8_t = simd_cast(vget_high_u8(a)); + let d: uint16x8_t = simd_cast(vget_high_u8(b)); + simd_sub(c, d) } - unsafe { _vsm3tt2aq_u32(a, b, c, IMM2 as i64) } } -#[doc = "SM3TT2B"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt2b" - )] - fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] +pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { + unsafe { + let c: uint32x4_t = simd_cast(vget_high_u16(a)); + let d: uint32x4_t = simd_cast(vget_high_u16(b)); + simd_sub(c, d) } - unsafe { _vsm3tt2bq_u32(a, b, c, IMM2 as i64) } } -#[doc = "SM4 key"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm4ekey))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm4ekey" - )] - fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] +pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { + unsafe { + let c: uint64x2_t = simd_cast(vget_high_u32(a)); + let d: uint64x2_t = simd_cast(vget_high_u32(b)); + simd_sub(c, d) } - unsafe { _vsm4ekeyq_u32(a, b) } } -#[doc = "SM4 encode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm4e))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm4e" - )] - fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsm4eq_u32(a, b) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] +pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let c = vget_high_s8(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u8)"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v8i8" - )] - fn _vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vsqadd_u8(a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] +pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let c = vget_high_s16(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u8)"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v16i8" - )] - fn _vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vsqaddq_u8(a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] +pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let c = vget_high_s32(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u16)"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v4i16" - )] - fn _vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vsqadd_u16(a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] +pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let c = vget_high_u8(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u16)"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v8i16" - )] - fn _vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vsqaddq_u16(a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] +pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let c = vget_high_u16(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u32)"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v2i32" - )] - fn _vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vsqadd_u32(a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] +pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let c = vget_high_u32(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u32)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v4i32" - )] - fn _vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vsqaddq_u32(a, b) } +pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vqtbl1_s8(vcombine_s8(a, unsafe { crate::mem::zeroed() }), unsafe { + { + transmute(b) + } + }) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v1i64" - )] - fn _vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vsqadd_u64(a, b) } +pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + vqtbl1_u8(vcombine_u8(a, unsafe { crate::mem::zeroed() }), b) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v2i64" - )] - fn _vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vsqaddq_u64(a, b) } +pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + vqtbl1_p8(vcombine_p8(a, unsafe { crate::mem::zeroed() }), b) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqaddb_u8(a: u8, b: i8) -> u8 { - vget_lane_u8::<0>(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b))) +pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b)) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqaddh_u16(a: u16, b: i16) -> u16 { - vget_lane_u16::<0>(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b))) +pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + vqtbl1_u8(vcombine_u8(a.0, a.1), b) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqaddd_u64(a: u64, b: i64) -> u64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.i64" - )] - fn _vsqaddd_u64(a: u64, b: i64) -> u64; - } - unsafe { _vsqaddd_u64(a, b) } +pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + vqtbl1_p8(vcombine_p8(a.0, a.1), b) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqadds_u32(a: u32, b: i32) -> u32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.i32" - )] - fn _vsqadds_u32(a: u32, b: i32) -> u32; - } - unsafe { _vsqadds_u32(a, b) } +pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + let x = int8x16x2_t( + vcombine_s8(a.0, a.1), + vcombine_s8(a.2, unsafe { crate::mem::zeroed() }), + ); + vqtbl2_s8(x, vreinterpret_u8_s8(b)) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f16)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] #[inline] -#[cfg_attr(test, assert_instr(fsqrt))] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_fsqrt(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t( + vcombine_u8(a.0, a.1), + vcombine_u8(a.2, unsafe { crate::mem::zeroed() }), + ); + vqtbl2_u8(x, b) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f16)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] #[inline] -#[cfg_attr(test, assert_instr(fsqrt))] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsqrtq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_fsqrt(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t( + vcombine_p8(a.0, a.1), + vcombine_p8(a.2, unsafe { crate::mem::zeroed() }), + ); + vqtbl2_p8(x, b) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrt_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_fsqrt(a) } +pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)); + vqtbl2_s8(x, vreinterpret_u8_s8(b)) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrtq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_fsqrt(a) } +pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)); + vqtbl2_u8(x, b) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrt_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_fsqrt(a) } +pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)); + vqtbl2_p8(x, b) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_fsqrt(a) } -} -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrth_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(fsqrt))] -pub fn vsqrth_f16(a: f16) -> f16 { - sqrtf16(a) +pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(8))), + vqtbx1_s8( + a, + vcombine_s8(b, crate::mem::zeroed()), + vreinterpret_u8_s8(c), + ), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { super::shift_right_and_insert!(u8, 8, N, a, b) } +pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(8))), + vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { super::shift_right_and_insert!(u8, 16, N, a, b) } +pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(8))), + vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { super::shift_right_and_insert!(u16, 4, N, a, b) } +pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(16))), + vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { super::shift_right_and_insert!(u16, 8, N, a, b) } +pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(16))), + vqtbx1_u8(a, vcombine_u8(b.0, b.1), c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { super::shift_right_and_insert!(u32, 2, N, a, b) } +pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(16))), + vqtbx1_p8(a, vcombine_p8(b.0, b.1), c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { super::shift_right_and_insert!(u32, 4, N, a, b) } +pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + let x = int8x16x2_t( + vcombine_s8(b.0, b.1), + vcombine_s8(b.2, unsafe { crate::mem::zeroed() }), + ); + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(24))), + vqtbx2_s8(a, x, vreinterpret_u8_s8(c)), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { super::shift_right_and_insert!(u64, 1, N, a, b) } +pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t( + vcombine_u8(b.0, b.1), + vcombine_u8(b.2, unsafe { crate::mem::zeroed() }), + ); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(24))), + vqtbx2_u8(a, x, c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { super::shift_right_and_insert!(u64, 2, N, a, b) } +pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t( + vcombine_p8(b.0, b.1), + vcombine_p8(b.2, unsafe { crate::mem::zeroed() }), + ); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(24))), + vqtbx2_p8(a, x, c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3)); + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(32))), + vqtbx2_s8(a, x, vreinterpret_u8_s8(c)), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } +pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3)); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(32))), + vqtbx2_u8(a, x, c), + a, + ) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } +pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3)); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(32))), + vqtbx2_p8(a, x, c), + a, + ) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { transmute(vsri_n_s32::(transmute(a), transmute(b))) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { transmute(vsriq_n_s32::(transmute(a), transmute(b))) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } -} -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Shift right and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] -pub fn vsrid_n_s64(a: i64, b: i64) -> i64 { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Shift right and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] -pub fn vsrid_n_u64(a: u64, b: u64) -> u64 { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_u64::(transmute(a), transmute(b))) } -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v1f64.p0" - )] - fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst1_f64_x2(b.0, b.1, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2f64.p0" - )] - fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ) } - _vst1q_f64_x2(b.0, b.1, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v1f64.p0" - )] - fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - _vst1_f64_x3(b.0, b.1, b.2, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2f64.p0" - )] - fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64); - } - _vst1q_f64_x3(b.0, b.1, b.2, a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v1f64.p0" - )] - fn _vst1_f64_x4( - a: float64x1_t, - b: float64x1_t, - c: float64x1_t, - d: float64x1_t, - ptr: *mut f64, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - _vst1_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2f64.p0" - )] - fn _vst1q_f64_x4( - a: float64x2_t, - b: float64x2_t, - c: float64x2_t, - d: float64x2_t, - ptr: *mut f64, - ); - } - _vst1q_f64_x4(b.0, b.1, b.2, b.3, a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(stp))] -pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) { - core::ptr::write_unaligned(a.cast(), b) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_f64(a: *mut f64, b: float64x1x2_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v1f64.p0" - )] - fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8); - } - _vst2_lane_f64(b.0, b.1, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s64(a: *mut i64, b: int64x1x2_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v1i64.p0" - )] - fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - _vst2_lane_s64(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_p64(a: *mut p64, b: poly64x1x2_t) { - static_assert!(LANE == 0); - vst2_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t) { - static_assert!(LANE == 0); - vst2_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { - crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) { - crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_f64(a: *mut f64, b: float64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2f64.p0" - )] - fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s8(a: *mut i8, b: int8x16x2_t) { - static_assert_uimm_bits!(LANE, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v16i8.p0" - )] - fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s64(a: *mut i64, b: int64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2i64.p0" - )] - fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_p64(a: *mut p64, b: poly64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - vst2q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_u8(a: *mut u8, b: uint8x16x2_t) { - static_assert_uimm_bits!(LANE, 4); - vst2q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_u64(a: *mut u64, b: uint64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - vst2q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_p8(a: *mut p8, b: poly8x16x2_t) { - static_assert_uimm_bits!(LANE, 4); - vst2q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st2))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) { - vst2q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) { - vst2q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) { - core::ptr::write_unaligned(a.cast(), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_f64(a: *mut f64, b: float64x1x3_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v1f64.p0" - )] - fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8); - } - _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s64(a: *mut i64, b: int64x1x3_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v1i64.p0" - )] - fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3_lane_p64(a: *mut p64, b: poly64x1x3_t) { - static_assert!(LANE == 0); - vst3_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t) { - static_assert!(LANE == 0); - vst3_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { - crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) { - crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_f64(a: *mut f64, b: float64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2f64.p0" - )] - fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s8(a: *mut i8, b: int8x16x3_t) { - static_assert_uimm_bits!(LANE, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v16i8.p0" - )] - fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s64(a: *mut i64, b: int64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2i64.p0" - )] - fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_p64(a: *mut p64, b: poly64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - vst3q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_u8(a: *mut u8, b: uint8x16x3_t) { - static_assert_uimm_bits!(LANE, 4); - vst3q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_u64(a: *mut u64, b: uint64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - vst3q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_p8(a: *mut p8, b: poly8x16x3_t) { - static_assert_uimm_bits!(LANE, 4); - vst3q_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) { - vst3q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) { - vst3q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) { - core::ptr::write_unaligned(a.cast(), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_f64(a: *mut f64, b: float64x1x4_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v1f64.p0" - )] - fn _vst4_lane_f64( - a: float64x1_t, - b: float64x1_t, - c: float64x1_t, - d: float64x1_t, - n: i64, - ptr: *mut i8, +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s64(a: *mut i64, b: int64x1x4_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v1i64.p0" - )] - fn _vst4_lane_s64( - a: int64x1_t, - b: int64x1_t, - c: int64x1_t, - d: int64x1_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4_lane_p64(a: *mut p64, b: poly64x1x4_t) { - static_assert!(LANE == 0); - vst4_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t) { - static_assert!(LANE == 0); - vst4_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { - crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) { - crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_f64(a: *mut f64, b: float64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2f64.p0" - )] - fn _vst4q_lane_f64( - a: float64x2_t, - b: float64x2_t, - c: float64x2_t, - d: float64x2_t, - n: i64, - ptr: *mut i8, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s8(a: *mut i8, b: int8x16x4_t) { - static_assert_uimm_bits!(LANE, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v16i8.p0" - )] - fn _vst4q_lane_s8( - a: int8x16_t, - b: int8x16_t, - c: int8x16_t, - d: int8x16_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s64(a: *mut i64, b: int64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2i64.p0" - )] - fn _vst4q_lane_s64( - a: int64x2_t, - b: int64x2_t, - c: int64x2_t, - d: int64x2_t, - n: i64, - ptr: *mut i8, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_p64(a: *mut p64, b: poly64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - vst4q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_u8(a: *mut u8, b: uint8x16x4_t) { - static_assert_uimm_bits!(LANE, 4); - vst4q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_u64(a: *mut u64, b: uint64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - vst4q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_p8(a: *mut p8, b: poly8x16x4_t) { - static_assert_uimm_bits!(LANE, 4); - vst4q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) { - vst4q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) { - vst4q_s64(transmute(a), transmute(b)) -} -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] -#[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_f64(ptr: *mut f64, val: float64x1_t) { - static_assert!(LANE == 0); - vstl1_lane_s64::(ptr as *mut i64, transmute(val)) -} -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] -#[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_f64(ptr: *mut f64, val: float64x2_t) { - static_assert_uimm_bits!(LANE, 1); - vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) -} -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] -#[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_u64(ptr: *mut u64, val: uint64x1_t) { - static_assert!(LANE == 0); - vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_u64(ptr: *mut u64, val: uint64x2_t) { - static_assert_uimm_bits!(LANE, 1); - vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe { + let c: int64x1_t = simd_and(a, b); + let d: i64x1 = i64x1::new(0); + simd_ne(c, transmute(d)) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_p64(ptr: *mut p64, val: poly64x1_t) { - static_assert!(LANE == 0); - vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe { + let c: int64x2_t = simd_and(a, b); + let d: i64x2 = i64x2::new(0, 0); + simd_ne(c, transmute(d)) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_p64(ptr: *mut p64, val: poly64x2_t) { - static_assert_uimm_bits!(LANE, 1); - vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t { + unsafe { + let c: poly64x1_t = simd_and(a, b); + let d: i64x1 = i64x1::new(0); + simd_ne(c, transmute(d)) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_s64(ptr: *mut i64, val: int64x1_t) { - static_assert!(LANE == 0); - let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; - let lane: i64 = vget_lane_s64::(val); - (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t { + unsafe { + let c: poly64x2_t = simd_and(a, b); + let d: i64x2 = i64x2::new(0, 0); + simd_ne(c, transmute(d)) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_s64(ptr: *mut i64, val: int64x2_t) { - static_assert_uimm_bits!(LANE, 1); - let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; - let lane: i64 = vgetq_lane_s64::(val); - (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { + let c: uint64x1_t = simd_and(a, b); + let d: u64x1 = u64x1::new(0); + simd_ne(c, transmute(d)) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fsub))] -pub fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"] +pub fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let c: uint64x2_t = simd_and(a, b); + let d: u64x2 = u64x2::new(0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Compare bitwise test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fsub))] -pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_sub(a, b) } +pub fn vtstd_s64(a: i64, b: i64) -> u64 { + unsafe { transmute(vtst_s64(transmute(a), transmute(b))) } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"] +#[doc = "Compare bitwise test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sub))] -pub fn vsubd_s64(a: i64, b: i64) -> i64 { - a.wrapping_sub(b) +pub fn vtstd_u64(a: u64, b: u64) -> u64 { + unsafe { transmute(vtst_u64(transmute(a), transmute(b))) } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sub))] -pub fn vsubd_u64(a: u64, b: u64) -> u64 { - a.wrapping_sub(b) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v8i8" + )] + fn _vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t; + } + unsafe { _vuqadd_s8(a, b) } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(fsub))] -pub fn vsubh_f16(a: f16, b: f16) -> f16 { - a - b +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v16i8" + )] + fn _vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t; + } + unsafe { _vuqaddq_s8(a, b) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] -pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { - unsafe { - let c: int16x8_t = simd_cast(vget_high_s8(a)); - let d: int16x8_t = simd_cast(vget_high_s8(b)); - simd_sub(c, d) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v4i16" + )] + fn _vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t; } + unsafe { _vuqadd_s16(a, b) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] -pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - unsafe { - let c: int32x4_t = simd_cast(vget_high_s16(a)); - let d: int32x4_t = simd_cast(vget_high_s16(b)); - simd_sub(c, d) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v8i16" + )] + fn _vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t; } + unsafe { _vuqaddq_s16(a, b) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] -pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - unsafe { - let c: int64x2_t = simd_cast(vget_high_s32(a)); - let d: int64x2_t = simd_cast(vget_high_s32(b)); - simd_sub(c, d) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v2i32" + )] + fn _vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t; } + unsafe { _vuqadd_s32(a, b) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] -pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - unsafe { - let c: uint16x8_t = simd_cast(vget_high_u8(a)); - let d: uint16x8_t = simd_cast(vget_high_u8(b)); - simd_sub(c, d) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v4i32" + )] + fn _vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t; } + unsafe { _vuqaddq_s32(a, b) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] -pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - unsafe { - let c: uint32x4_t = simd_cast(vget_high_u16(a)); - let d: uint32x4_t = simd_cast(vget_high_u16(b)); - simd_sub(c, d) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v1i64" + )] + fn _vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t; } + unsafe { _vuqadd_s64(a, b) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] -pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - unsafe { - let c: uint64x2_t = simd_cast(vget_high_u32(a)); - let d: uint64x2_t = simd_cast(vget_high_u32(b)); - simd_sub(c, d) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v2i64" + )] + fn _vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t; } + unsafe { _vuqaddq_s64(a, b) } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] -pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - let c = vget_high_s8(b); - unsafe { simd_sub(a, simd_cast(c)) } +pub fn vuqaddb_s8(a: i8, b: u8) -> i8 { + vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b))) } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] -pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - let c = vget_high_s16(b); - unsafe { simd_sub(a, simd_cast(c)) } +pub fn vuqaddh_s16(a: i16, b: u16) -> i16 { + vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b))) } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] -pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - let c = vget_high_s32(b); - unsafe { simd_sub(a, simd_cast(c)) } +pub fn vuqaddd_s64(a: i64, b: u64) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.i64" + )] + fn _vuqaddd_s64(a: i64, b: u64) -> i64; + } + unsafe { _vuqaddd_s64(a, b) } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] -pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - let c = vget_high_u8(b); - unsafe { simd_sub(a, simd_cast(c)) } +pub fn vuqadds_s32(a: i32, b: u32) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.i32" + )] + fn _vuqadds_s32(a: i32, b: u32) -> i32; + } + unsafe { _vuqadds_s32(a, b) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] -pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - let c = vget_high_u16(b); - unsafe { simd_sub(a, simd_cast(c)) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] -pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - let c = vget_high_u32(b); - unsafe { simd_sub(a, simd_cast(c)) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vqtbl1_s8(vcombine_s8(a, unsafe { crate::mem::zeroed() }), unsafe { - { - transmute(b) - } - }) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - vqtbl1_u8(vcombine_u8(a, unsafe { crate::mem::zeroed() }), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - vqtbl1_p8(vcombine_p8(a, unsafe { crate::mem::zeroed() }), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - vqtbl1_u8(vcombine_u8(a.0, a.1), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - vqtbl1_p8(vcombine_p8(a.0, a.1), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - let x = int8x16x2_t( - vcombine_s8(a.0, a.1), - vcombine_s8(a.2, unsafe { crate::mem::zeroed() }), - ); - vqtbl2_s8(x, vreinterpret_u8_s8(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t( - vcombine_u8(a.0, a.1), - vcombine_u8(a.2, unsafe { crate::mem::zeroed() }), - ); - vqtbl2_u8(x, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t( - vcombine_p8(a.0, a.1), - vcombine_p8(a.2, unsafe { crate::mem::zeroed() }), - ); - vqtbl2_p8(x, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)); - vqtbl2_s8(x, vreinterpret_u8_s8(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)); - vqtbl2_u8(x, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)); - vqtbl2_p8(x, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe { - simd_select( - simd_lt::(c, transmute(i8x8::splat(8))), - vqtbx1_s8( - a, - vcombine_s8(b, crate::mem::zeroed()), - vreinterpret_u8_s8(c), - ), - a, - ) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(8))), - vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c), - a, - ) + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { - unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(8))), - vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c), - a, - ) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { - simd_select( - simd_lt::(c, transmute(i8x8::splat(16))), - vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)), - a, - ) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(16))), - vqtbx1_u8(a, vcombine_u8(b.0, b.1), c), - a, - ) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(16))), - vqtbx1_p8(a, vcombine_p8(b.0, b.1), c), - a, - ) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - let x = int8x16x2_t( - vcombine_s8(b.0, b.1), - vcombine_s8(b.2, unsafe { crate::mem::zeroed() }), - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { - simd_select( - simd_lt::(c, transmute(i8x8::splat(24))), - vqtbx2_s8(a, x, vreinterpret_u8_s8(c)), + simd_shuffle!( a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] ) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t( - vcombine_u8(b.0, b.1), - vcombine_u8(b.2, unsafe { crate::mem::zeroed() }), - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(24))), - vqtbx2_u8(a, x, c), + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] ) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t( - vcombine_p8(b.0, b.1), - vcombine_p8(b.2, unsafe { crate::mem::zeroed() }), - ); - unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(24))), - vqtbx2_p8(a, x, c), - a, - ) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3)); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe { - simd_select( - simd_lt::(c, transmute(i8x8::splat(32))), - vqtbx2_s8(a, x, vreinterpret_u8_s8(c)), - a, - ) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3)); - unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(32))), - vqtbx2_u8(a, x, c), - a, - ) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3)); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(32))), - vqtbx2_p8(a, x, c), - a, - ) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } -} -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(uzp1) )] -pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ) + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp1) )] -pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp2) )] -pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp2) )] -pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(uzp2) )] -pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(zip2) )] -pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn1) + assert_instr(zip2) )] -pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(zip2) )] -pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(zip2) )] -pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { +pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(zip2) )] -pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(zip2) )] -pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(zip2) )] -pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { - simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ) + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_shuffle!( a, b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] ) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { - simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(trn2) + assert_instr(uzp2) )] -pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } -} -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe { - let c: int64x1_t = simd_and(a, b); - let d: i64x1 = i64x1::new(0); - simd_ne(c, transmute(d)) - } -} -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { +pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { - let c: int64x2_t = simd_and(a, b); - let d: i64x2 = i64x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t { - unsafe { - let c: poly64x1_t = simd_and(a, b); - let d: i64x1 = i64x1::new(0); - simd_ne(c, transmute(d)) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe { - let c: poly64x2_t = simd_and(a, b); - let d: i64x2 = i64x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { - let c: uint64x1_t = simd_and(a, b); - let d: u64x1 = u64x1::new(0); - simd_ne(c, transmute(d)) + simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { - let c: uint64x2_t = simd_and(a, b); - let d: u64x2 = u64x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Compare bitwise test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstd_s64(a: i64, b: i64) -> u64 { - unsafe { transmute(vtst_s64(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Compare bitwise test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstd_u64(a: u64, b: u64) -> u64 { - unsafe { transmute(vtst_u64(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v8i8" - )] - fn _vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t; - } - unsafe { _vuqadd_s8(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v16i8" - )] - fn _vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vuqaddq_s8(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v4i16" - )] - fn _vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t; - } - unsafe { _vuqadd_s16(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v8i16" - )] - fn _vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vuqaddq_s16(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v2i32" - )] - fn _vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t; - } - unsafe { _vuqadd_s32(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v4i32" - )] - fn _vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vuqaddq_s32(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v1i64" - )] - fn _vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ) } - unsafe { _vuqadd_s64(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v2i64" - )] - fn _vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - unsafe { _vuqaddq_s64(a, b) } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqaddb_s8(a: i8, b: u8) -> i8 { - vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b))) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqaddh_s16(a: i16, b: u16) -> i16 { - vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b))) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqaddd_s64(a: i64, b: u64) -> i64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.i64" - )] - fn _vuqaddd_s64(a: i64, b: u64) -> i64; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vuqaddd_s64(a, b) } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"] +#[doc = "Exclusive OR and rotate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqadds_s32(a: i32, b: u32) -> i32 { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(xar, IMM6 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vxarq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(IMM6, 6); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.i32" + link_name = "llvm.aarch64.crypto.xar" )] - fn _vuqadds_s32(a: i32, b: u32) -> i32; + fn _vxarq_u64(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t; } - unsafe { _vuqadds_s32(a, b) } + unsafe { _vxarq_u64(a, b, IMM6 as i64) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip1) )] -pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { +pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe { - simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { - simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) - } +pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp1) + assert_instr(zip1) )] -pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip2) + assert_instr(zip1) )] -pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { - simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { unsafe { simd_shuffle!( a, b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] ) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { unsafe { - simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(uzp2) + assert_instr(zip1) )] -pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } -} -#[doc = "Exclusive OR and rotate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"] -#[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(xar, IMM6 = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vxarq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(IMM6, 6); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.xar" - )] - fn _vxarq_u64(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t; +pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vxarq_u64(a, b, IMM6 as i64) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr( - all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) -)] -pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } -} -#[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_shuffle!( a, b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] ) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr( - all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) -)] -pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } -} -#[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr( - all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) -)] -pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } -} -#[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr( - all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) -)] -pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } -} -#[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), - assert_instr(zip1) + assert_instr(zip2) )] -pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_shuffle!( a, @@ -25526,158 +31818,205 @@ pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe { - simd_shuffle!( - a, - b, - [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] - ) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { +pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( all(test, not(target_env = "msvc"), target_endian = "little"), assert_instr(zip2) )] -pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( @@ -25685,11 +32024,17 @@ pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { assert_instr(zip2) )] pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( @@ -25706,8 +32051,37 @@ pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( @@ -25718,8 +32092,27 @@ pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( @@ -25730,8 +32123,27 @@ pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr( @@ -25741,3 +32153,21 @@ pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 476d8e78a2..8f6ea59efd 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -3154,6 +3154,7 @@ pub fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { #[doc = "AES single round encryption."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aesd))] @@ -3177,8 +3178,52 @@ pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { unsafe { _vaesdq_u8(data, key) } } #[doc = "AES single round encryption."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesd))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesd" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")] + fn _vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let key: uint8x16_t = simd_shuffle!( + key, + key, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaesdq_u8(data, key); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "AES single round encryption."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aese))] @@ -3201,9 +3246,53 @@ pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { } unsafe { _vaeseq_u8(data, key) } } +#[doc = "AES single round encryption."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aese))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aese" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")] + fn _vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let key: uint8x16_t = simd_shuffle!( + key, + key, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaeseq_u8(data, key); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "AES inverse mix columns."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aesimc))] @@ -3226,9 +3315,48 @@ pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { } unsafe { _vaesimcq_u8(data) } } +#[doc = "AES inverse mix columns."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesimc))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesimc" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")] + fn _vaesimcq_u8(data: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaesimcq_u8(data); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "AES mix columns."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aesmc))] @@ -3251,6 +3379,44 @@ pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { } unsafe { _vaesmcq_u8(data) } } +#[doc = "AES mix columns."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesmc))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesmc" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")] + fn _vaesmcq_u8(data: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaesmcq_u8(data); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "Vector bitwise and"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)"] #[inline] @@ -7342,7 +7508,6 @@ pub fn vclzq_s32(a: int32x4_t) -> int32x4_t { #[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] @@ -7362,35 +7527,8 @@ pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t { unsafe { transmute(vclz_s16(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vclz_s16(transmute(a))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] @@ -7410,35 +7548,8 @@ pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { unsafe { transmute(vclzq_s16(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(vclzq_s16(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] @@ -7458,35 +7569,8 @@ pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t { unsafe { transmute(vclz_s32(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x2_t = transmute(vclz_s32(transmute(a))); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] @@ -7506,35 +7590,8 @@ pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { unsafe { transmute(vclzq_s32(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(vclzq_s32(transmute(a))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] @@ -7554,35 +7611,8 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { unsafe { transmute(vclz_s8(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vclz_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] @@ -7601,37 +7631,6 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { unsafe { transmute(vclzq_s8(transmute(a))) } } -#[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(vclzq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} #[doc = "Population count per byte."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_s8)"] #[inline] @@ -7677,7 +7676,6 @@ pub fn vcntq_s8(a: int8x16_t) -> int8x16_t { #[doc = "Population count per byte."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7697,9 +7695,8 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { unsafe { transmute(vcnt_s8(transmute(a))) } } #[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7715,17 +7712,12 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vcnt_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vcntq_s8(transmute(a))) } } #[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7741,13 +7733,12 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vcntq_s8(transmute(a))) } +pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { transmute(vcnt_s8(transmute(a))) } } #[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7763,77 +7754,61 @@ pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(vcntq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { transmute(vcntq_s8(transmute(a))) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { transmute(vcnt_s8(transmute(a))) } +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vcnt_s8(transmute(a))); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -7842,20 +7817,16 @@ pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { transmute(vcntq_s8(transmute(a))) } +pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -7864,40 +7835,36 @@ pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { +pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(vcntq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7909,12 +7876,23 @@ pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7926,12 +7904,13 @@ pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } #[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7944,11 +7923,17 @@ pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7964,8 +7949,32 @@ pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } } #[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7981,8 +7990,9 @@ pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { unsafe { simd_shuffle!(a, b, [0, 1]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7994,12 +8004,16 @@ pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { + unsafe { + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8011,12 +8025,13 @@ pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8028,12 +8043,23 @@ pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8045,12 +8071,13 @@ pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 1]) } +pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8062,12 +8089,18 @@ pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8079,12 +8112,13 @@ pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8096,71 +8130,60 @@ pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 1]) } +pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vcreate_f16(a: u64) -> float16x4_t { - unsafe { transmute(a) } +pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vcreate_f16(a: u64) -> float16x4_t { +pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8169,20 +8192,16 @@ pub fn vcreate_f16(a: u64) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_f32(a: u64) -> float32x2_t { - unsafe { transmute(a) } +pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8191,23 +8210,26 @@ pub fn vcreate_f32(a: u64) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_f32(a: u64) -> float32x2_t { +pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8216,20 +8238,16 @@ pub fn vcreate_f32(a: u64) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s8(a: u64) -> int8x8_t { - unsafe { transmute(a) } +pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8238,23 +8256,21 @@ pub fn vcreate_s8(a: u64) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s8(a: u64) -> int8x8_t { +pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { unsafe { - let ret_val: int8x8_t = transmute(a); + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8263,20 +8279,16 @@ pub fn vcreate_s8(a: u64) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s16(a: u64) -> int16x4_t { - unsafe { transmute(a) } +pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8285,16 +8297,15 @@ pub fn vcreate_s16(a: u64) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s16(a: u64) -> int16x4_t { +pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8302,21 +8313,22 @@ pub fn vcreate_s16(a: u64) -> int16x4_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop) )] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s32(a: u64) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vcreate_f16(a: u64) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8332,14 +8344,11 @@ pub fn vcreate_s32(a: u64) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s32(a: u64) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vcreate_f32(a: u64) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -8356,13 +8365,12 @@ pub fn vcreate_s32(a: u64) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s64(a: u64) -> int64x1_t { +pub fn vcreate_s8(a: u64) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8378,13 +8386,12 @@ pub fn vcreate_s64(a: u64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u8(a: u64) -> uint8x8_t { +pub fn vcreate_s16(a: u64) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8400,16 +8407,12 @@ pub fn vcreate_u8(a: u64) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u8(a: u64) -> uint8x8_t { - unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vcreate_s32(a: u64) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8425,13 +8428,12 @@ pub fn vcreate_u8(a: u64) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u16(a: u64) -> uint16x4_t { +pub fn vcreate_s64(a: u64) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8447,16 +8449,12 @@ pub fn vcreate_u16(a: u64) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u16(a: u64) -> uint16x4_t { - unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vcreate_u8(a: u64) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8472,13 +8470,12 @@ pub fn vcreate_u16(a: u64) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u32(a: u64) -> uint32x2_t { +pub fn vcreate_u16(a: u64) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8495,10 +8492,7 @@ pub fn vcreate_u32(a: u64) -> uint32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vcreate_u32(a: u64) -> uint32x2_t { - unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"] @@ -8524,7 +8518,6 @@ pub fn vcreate_u64(a: u64) -> uint64x1_t { #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8544,34 +8537,8 @@ pub fn vcreate_p8(a: u64) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vcreate_p8(a: u64) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8591,31 +8558,6 @@ pub fn vcreate_p16(a: u64) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vcreate_p16(a: u64) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"] #[inline] #[target_feature(enable = "neon,aes")] @@ -9933,6 +9875,7 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -9956,8 +9899,39 @@ pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -9981,8 +9955,39 @@ pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10004,8 +10009,37 @@ pub fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10027,8 +10061,37 @@ pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10050,8 +10113,37 @@ pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10073,8 +10165,37 @@ pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10096,8 +10217,37 @@ pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10119,8 +10269,37 @@ pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10142,8 +10321,37 @@ pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10165,8 +10373,37 @@ pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10188,8 +10425,9 @@ pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10206,13 +10444,18 @@ pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { +pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10229,13 +10472,14 @@ pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { +pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { static_assert_uimm_bits!(N, 2); unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10252,19 +10496,24 @@ pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { +pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10275,19 +10524,20 @@ pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); +pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(N, 2); unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10298,19 +10548,24 @@ pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10321,19 +10576,20 @@ pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); +pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 2); unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10344,13 +10600,18 @@ pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] @@ -10367,13 +10628,14 @@ pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { +pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] @@ -10390,19 +10652,24 @@ pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { +pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10413,19 +10680,20 @@ pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { - static_assert!(N == 0); - a +pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10436,69 +10704,76 @@ pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N == 0); - a +pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { +pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { +pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10509,19 +10784,20 @@ pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10532,19 +10808,28 @@ pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10555,19 +10840,20 @@ pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10578,19 +10864,28 @@ pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10601,19 +10896,20 @@ pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10624,19 +10920,27 @@ pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(nop, N = 0) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10647,19 +10951,19 @@ pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } +pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N == 0); + a } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(nop, N = 0) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10670,13 +10974,14 @@ pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } +pub fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N == 0); + a } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] @@ -10685,21 +10990,24 @@ pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(N, 3); unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] @@ -10708,21 +11016,28 @@ pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] @@ -10731,21 +11046,24 @@ pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(N, 3); unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] @@ -10754,27 +11072,34 @@ pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10785,19 +11110,20 @@ pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10808,19 +11134,24 @@ pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10831,19 +11162,20 @@ pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10854,19 +11186,24 @@ pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10877,19 +11214,20 @@ pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10900,19 +11238,24 @@ pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 1) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10923,19 +11266,20 @@ pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { - static_assert_uimm_bits!(N, 1); - unsafe { transmute(vgetq_lane_s64::(a)) } +pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 1) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10946,52 +11290,50 @@ pub fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 1); - unsafe { transmute(vgetq_lane_u64::(a)) } +pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Create a new vector with all lanes set to a value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vdup_n_f16(a: f16) -> float16x4_t { - float16x4_t::splat(a) -} -#[doc = "Create a new vector with all lanes set to a value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[rustc_legacy_const_generics(1)] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vdupq_n_f16(a: f16) -> float16x8_t { - float16x8_t::splat(a) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11000,19 +11342,26 @@ pub fn vdupq_n_f16(a: f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_f32(value: f32) -> float32x2_t { - float32x2_t::splat(value) +pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11021,19 +11370,22 @@ pub fn vdup_n_f32(value: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_p16(value: p16) -> poly16x4_t { - poly16x4_t::splat(value) +pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11042,19 +11394,26 @@ pub fn vdup_n_p16(value: p16) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_p8(value: p8) -> poly8x8_t { - poly8x8_t::splat(value) +pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11063,19 +11422,22 @@ pub fn vdup_n_p8(value: p8) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s16(value: i16) -> int16x4_t { - int16x4_t::splat(value) +pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11084,19 +11446,26 @@ pub fn vdup_n_s16(value: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s32(value: i32) -> int32x2_t { - int32x2_t::splat(value) +pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11105,19 +11474,22 @@ pub fn vdup_n_s32(value: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s64(value: i64) -> int64x1_t { - int64x1_t::splat(value) +pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11126,19 +11498,26 @@ pub fn vdup_n_s64(value: i64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s8(value: i8) -> int8x8_t { - int8x8_t::splat(value) +pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11147,19 +11526,22 @@ pub fn vdup_n_s8(value: i8) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u16(value: u16) -> uint16x4_t { - uint16x4_t::splat(value) +pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11168,19 +11550,26 @@ pub fn vdup_n_u16(value: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u32(value: u32) -> uint32x2_t { - uint32x2_t::splat(value) +pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11189,19 +11578,22 @@ pub fn vdup_n_u32(value: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u64(value: u64) -> uint64x1_t { - uint64x1_t::splat(value) +pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11210,19 +11602,26 @@ pub fn vdup_n_u64(value: u64) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u8(value: u8) -> uint8x8_t { - uint8x8_t::splat(value) +pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11231,19 +11630,22 @@ pub fn vdup_n_u8(value: u8) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_f32(value: f32) -> float32x4_t { - float32x4_t::splat(value) +pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11252,19 +11654,26 @@ pub fn vdupq_n_f32(value: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_p16(value: p16) -> poly16x8_t { - poly16x8_t::splat(value) +pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11273,19 +11682,22 @@ pub fn vdupq_n_p16(value: p16) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_p8(value: p8) -> poly8x16_t { - poly8x16_t::splat(value) +pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11294,19 +11706,26 @@ pub fn vdupq_n_p8(value: p8) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s16(value: i16) -> int16x8_t { - int16x8_t::splat(value) +pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11315,19 +11734,22 @@ pub fn vdupq_n_s16(value: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s32(value: i32) -> int32x4_t { - int32x4_t::splat(value) +pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11336,19 +11758,27 @@ pub fn vdupq_n_s32(value: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s64(value: i64) -> int64x2_t { - int64x2_t::splat(value) +pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11357,19 +11787,22 @@ pub fn vdupq_n_s64(value: i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s8(value: i8) -> int8x16_t { - int8x16_t::splat(value) +pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11378,19 +11811,27 @@ pub fn vdupq_n_s8(value: i8) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u16(value: u16) -> uint16x8_t { - uint16x8_t::splat(value) +pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11399,19 +11840,22 @@ pub fn vdupq_n_u16(value: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u32(value: u32) -> uint32x4_t { - uint32x4_t::splat(value) +pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11420,19 +11864,27 @@ pub fn vdupq_n_u32(value: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u64(value: u64) -> uint64x2_t { - uint64x2_t::splat(value) +pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11441,19 +11893,22 @@ pub fn vdupq_n_u64(value: u64) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u8(value: u8) -> uint8x16_t { - uint8x16_t::splat(value) +pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11462,19 +11917,31 @@ pub fn vdupq_n_u8(value: u8) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { - float32x2_t::splat(value) +pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11483,18 +11950,20 @@ fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { - float32x4_t::splat(value) +pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 0) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11505,19 +11974,29 @@ fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { - static_assert!(N == 0); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 0) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11528,19 +12007,20 @@ pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { - static_assert!(N == 0); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 1) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11551,19 +12031,28 @@ pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 1) + assert_instr(nop, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11574,20 +12063,21 @@ pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { +pub fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } + unsafe { transmute(vgetq_lane_s64::(a)) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(nop, N = 1) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11596,18 +12086,51 @@ pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 1); + unsafe { transmute(vgetq_lane_u64::(a)) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"] +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_n_f16(a: f16) -> float16x4_t { + float16x4_t::splat(a) +} +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_n_f16(a: f16) -> float16x8_t { + float16x8_t::splat(a) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11617,18 +12140,18 @@ pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_f32(value: f32) -> float32x2_t { + float32x2_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11638,18 +12161,18 @@ pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_p16(value: p16) -> poly16x4_t { + poly16x4_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11659,18 +12182,18 @@ pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_p8(value: p8) -> poly8x8_t { + poly8x8_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11680,18 +12203,18 @@ pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_s16(value: i16) -> int16x4_t { + int16x4_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11701,18 +12224,18 @@ pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_s32(value: i32) -> int32x2_t { + int32x2_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(fmov) )] #[cfg_attr( not(target_arch = "arm"), @@ -11722,18 +12245,18 @@ pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_s64(value: i64) -> int64x1_t { + int64x1_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11743,18 +12266,18 @@ pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_s8(value: i8) -> int8x8_t { + int8x8_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11764,18 +12287,18 @@ pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_u16(value: u16) -> uint16x4_t { + uint16x4_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11785,18 +12308,18 @@ pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_u32(value: u32) -> uint32x2_t { + uint32x2_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(fmov) )] #[cfg_attr( not(target_arch = "arm"), @@ -11806,18 +12329,18 @@ pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_u64(value: u64) -> uint64x1_t { + uint64x1_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11827,18 +12350,18 @@ pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_u8(value: u8) -> uint8x8_t { + uint8x8_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11848,18 +12371,18 @@ pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_n_f32(value: f32) -> float32x4_t { + float32x4_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11869,18 +12392,18 @@ pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_n_p16(value: p16) -> poly16x8_t { + poly16x8_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11890,18 +12413,18 @@ pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_n_p8(value: p8) -> poly8x16_t { + poly8x16_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -11911,44 +12434,40 @@ pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_n_s16(value: i16) -> int16x8_t { + int16x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn vdupq_n_s32(value: i32) -> int32x4_t { + int32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11957,21 +12476,19 @@ pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +pub fn vdupq_n_s64(value: i64) -> int64x2_t { + int64x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11980,21 +12497,19 @@ pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +pub fn vdupq_n_s8(value: i8) -> int8x16_t { + int8x16_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12003,23 +12518,19 @@ pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +pub fn vdupq_n_u16(value: u16) -> uint16x8_t { + uint16x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12028,23 +12539,19 @@ pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { - static_assert!(N == 0); - a +pub fn vdupq_n_u32(value: u32) -> uint32x4_t { + uint32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12053,21 +12560,19 @@ pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { - static_assert!(N == 0); - a +pub fn vdupq_n_u64(value: u64) -> uint64x2_t { + uint64x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12076,36 +12581,19 @@ pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) - } +pub fn vdupq_n_u8(value: u8) -> uint8x16_t { + uint8x16_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12114,36 +12602,19 @@ pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) - } +fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { + float32x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12152,36 +12623,21 @@ pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) - } +fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { + float32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12190,36 +12646,22 @@ pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) - } +pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { + static_assert!(N == 0); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12228,36 +12670,25 @@ pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); +pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { + static_assert!(N == 0); unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) + let ret_val: int64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12266,75 +12697,49 @@ pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) - } +pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { + static_assert!(N == 0); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - static_assert_uimm_bits!(N, 3); +pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { + static_assert!(N == 0); unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7 - ] - ) + let ret_val: uint64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup, N = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12343,21 +12748,22 @@ pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup, N = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12366,21 +12772,26 @@ pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup, N = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12389,21 +12800,22 @@ pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup, N = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12412,21 +12824,24 @@ pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12435,21 +12850,19 @@ pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12458,21 +12871,19 @@ pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } +pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_xor(a, b) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12481,21 +12892,19 @@ pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12504,21 +12913,19 @@ pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 15) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12527,44 +12934,19 @@ pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7, - N as u32 + 8, - N as u32 + 9, - N as u32 + 10, - N as u32 + 11, - N as u32 + 12, - N as u32 + 13, - N as u32 + 14, - N as u32 + 15 - ] - ) - } +pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 15) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12573,44 +12955,19 @@ pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7, - N as u32 + 8, - N as u32 + 9, - N as u32 + 10, - N as u32 + 11, - N as u32 + 12, - N as u32 + 13, - N as u32 + 14, - N as u32 + 15 - ] - ) - } +pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 15) + assert_instr(eor) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12619,86 +12976,60 @@ pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - b, - [ - N as u32, - N as u32 + 1, - N as u32 + 2, - N as u32 + 3, - N as u32 + 4, - N as u32 + 5, - N as u32 + 6, - N as u32 + 7, - N as u32 + 8, - N as u32 + 9, - N as u32 + 10, - N as u32 + 11, - N as u32 + 12, - N as u32 + 13, - N as u32 + 14, - N as u32 + 15 - ] - ) - } +pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { - unsafe { simd_fma(b, c, a) } +pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { - unsafe { simd_fma(b, c, a) } +pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -12708,18 +13039,18 @@ pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { simd_fma(b, c, a) } +pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -12729,18 +13060,18 @@ pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { simd_fma(b, c, a) } +pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -12750,18 +13081,18 @@ pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vfma_f32(a, b, vdup_n_f32_vfp4(c)) +pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -12771,70 +13102,60 @@ pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) +pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { - unsafe { - let b: float16x4_t = simd_neg(b); - vfma_f16(a, b, c) - } +pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { - unsafe { - let b: float16x8_t = simd_neg(b); - vfmaq_f16(a, b, c) - } +pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -12844,67 +13165,77 @@ pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { - let b: float32x2_t = simd_neg(b); - vfma_f32(a, b, c) - } +pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(ext, N = 3) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { - let b: float32x4_t = simd_neg(b); - vfmaq_f32(a, b, c) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(ext, N = 3) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vfms_f32(a, b, vdup_n_f32_vfp4(c)) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12913,57 +13244,75 @@ pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) +pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12972,19 +13321,27 @@ pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { - unsafe { simd_shuffle!(a, a, [2, 3]) } +pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12993,19 +13350,22 @@ pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13014,19 +13374,28 @@ pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(nop, N = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13035,19 +13404,23 @@ pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { + static_assert!(N == 0); + a } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(nop, N = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13056,19 +13429,22 @@ pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { - unsafe { simd_shuffle!(a, a, [2, 3]) } +pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { + static_assert!(N == 0); + a } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13077,19 +13453,37 @@ pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13098,19 +13492,40 @@ pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13119,19 +13534,37 @@ pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, a, [2, 3]) } +pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13140,19 +13573,40 @@ pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13161,19 +13615,37 @@ pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { - unsafe { transmute(u64x1::new(simd_extract!(a, 1))) } +pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13182,19 +13654,40 @@ pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { - unsafe { int64x1_t([simd_extract!(a, 1)]) } +pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 7) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13203,52 +13696,37 @@ pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { - unsafe { uint64x1_t([simd_extract!(a, 1)]) } -} -#[doc = "Duplicate vector element to scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vget_lane_f16(a: float16x4_t) -> f16 { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_extract!(a, LANE as u32) } +pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Duplicate vector element to scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(ext, N = 7) )] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_extract!(a, LANE as u32) } -} -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13257,36 +13735,40 @@ pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_f32(v: float32x2_t) -> f32 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) )] -pub fn vget_lane_p16(v: poly16x4_t) -> p16 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } -} -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13295,17 +13777,37 @@ pub fn vget_lane_p16(v: poly16x4_t) -> p16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_p8(v: poly8x8_t) -> p8 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13314,17 +13816,40 @@ pub fn vget_lane_p8(v: poly8x8_t) -> p8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s16(v: int16x4_t) -> i16 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13333,17 +13858,37 @@ pub fn vget_lane_s16(v: int16x4_t) -> i16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s32(v: int32x2_t) -> i32 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13352,93 +13897,123 @@ pub fn vget_lane_s32(v: int32x2_t) -> i32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s8(v: int8x8_t) -> i8 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } -} -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vget_lane_u16(v: uint16x4_t) -> u16 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) )] -pub fn vget_lane_u32(v: uint32x2_t) -> u32 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } -} -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_u8(v: uint8x8_t) -> u8 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13447,17 +14022,22 @@ pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13466,17 +14046,28 @@ pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13485,17 +14076,22 @@ pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { - static_assert_uimm_bits!(IMM5, 4); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13504,17 +14100,28 @@ pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13523,17 +14130,22 @@ pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13542,17 +14154,28 @@ pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13561,17 +14184,22 @@ pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { - static_assert_uimm_bits!(IMM5, 4); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13580,17 +14208,28 @@ pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13599,17 +14238,22 @@ pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13618,17 +14262,28 @@ pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13637,17 +14292,22 @@ pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { - static_assert_uimm_bits!(IMM5, 4); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13656,17 +14316,28 @@ pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_p64(v: poly64x1_t) -> p64 { - static_assert!(IMM5 == 0); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13675,17 +14346,22 @@ pub fn vget_lane_p64(v: poly64x1_t) -> p64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s64(v: int64x1_t) -> i64 { - static_assert!(IMM5 == 0); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13694,16 +14370,27 @@ pub fn vget_lane_s64(v: int64x1_t) -> i64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_u64(v: uint64x1_t) -> u64 { - static_assert!(IMM5 == 0); - unsafe { simd_extract!(v, 0) } +pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13712,15 +14399,22 @@ pub fn vget_lane_u64(v: uint64x1_t) -> u64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { - unsafe { simd_shuffle!(a, a, [0, 1]) } +pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13729,15 +14423,27 @@ pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13746,15 +14452,45 @@ pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13763,32 +14499,54 @@ pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } -} -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { - unsafe { simd_shuffle!(a, a, [0, 1]) } +pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13797,15 +14555,45 @@ pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13814,15 +14602,54 @@ pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13831,15 +14658,45 @@ pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, a, [0, 1]) } +pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13848,49 +14705,96 @@ pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"] +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { - unsafe { transmute(u64x1::new(simd_extract!(a, 0))) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { - unsafe { int64x1_t([simd_extract!(a, 0)]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13899,18 +14803,18 @@ pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { - unsafe { uint64x1_t([simd_extract!(a, 0)]) } +pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -13920,26 +14824,18 @@ pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] - fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vhadd_s8(a, b) } +pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -13949,26 +14845,18 @@ pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] - fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vhaddq_s8(a, b) } +pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfma_f32(a, b, vdup_n_f32_vfp4(c)) } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -13978,84 +14866,70 @@ pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] - fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vhadd_s16(a, b) } +pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(fmls) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] - fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe { + let b: float16x4_t = simd_neg(b); + vfma_f16(a, b, c) } - unsafe { _vhaddq_s16(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(fmls) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] - fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe { + let b: float16x8_t = simd_neg(b); + vfmaq_f16(a, b, c) } - unsafe { _vhadd_s32(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14065,26 +14939,21 @@ pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] - fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; +pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { + let b: float32x2_t = simd_neg(b); + vfma_f32(a, b, c) } - unsafe { _vhaddq_s32(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14094,26 +14963,21 @@ pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] - fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; +pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { + let b: float32x4_t = simd_neg(b); + vfmaq_f32(a, b, c) } - unsafe { _vhadd_u8(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"] +#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14123,26 +14987,18 @@ pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] - fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } - unsafe { _vhaddq_u8(a, b) } +pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfms_f32(a, b, vdup_n_f32_vfp4(c)) } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"] +#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14152,113 +15008,107 @@ pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] - fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vhadd_u16(a, b) } +pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] - fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } - unsafe { _vhaddq_u16(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] - fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vhadd_u32(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] - fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vhaddq_u32(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14268,26 +15118,19 @@ pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] - fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vhsub_s16(a, b) } +pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14297,26 +15140,23 @@ pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] - fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; +pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [2, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vhsubq_s16(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14326,26 +15166,19 @@ pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] - fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vhsub_s32(a, b) } +pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14355,26 +15188,23 @@ pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] - fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; +pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vhsubq_s32(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14384,26 +15214,19 @@ pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] - fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vhsub_s8(a, b) } +pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14413,26 +15236,24 @@ pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] - fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; +pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vhsubq_s8(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14442,26 +15263,19 @@ pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] - fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vhsub_u8(a, b) } +pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14471,26 +15285,23 @@ pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] - fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; +pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vhsubq_u8(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14500,26 +15311,19 @@ pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] - fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vhsub_u16(a, b) } +pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14529,26 +15333,23 @@ pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] - fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; +pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [2, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vhsubq_u16(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14558,26 +15359,19 @@ pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] - fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vhsub_u32(a, b) } +pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14587,66 +15381,46 @@ pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] - fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; +pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vhsubq_u32(a, b) } } -#[doc = "Load one single-element structure and replicate to all lanes of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { - let x = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0))); - vdup_lane_f16::<0>(x) -} -#[doc = "Load one single-element structure and replicate to all lanes of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { - let x = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0))); - vdupq_laneq_f16::<0>(x) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14656,20 +15430,23 @@ pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { - transmute(f32x2::splat(*ptr)) +pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14679,20 +15456,19 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { - transmute(u16x4::splat(*ptr)) +pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14702,20 +15478,23 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { - transmute(u8x8::splat(*ptr)) +pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [2, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14725,20 +15504,19 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { - transmute(i16x4::splat(*ptr)) +pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14748,20 +15526,24 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { - transmute(i32x2::splat(*ptr)) +pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14771,20 +15553,19 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { - transmute(i8x8::splat(*ptr)) +pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { transmute(u64x1::new(simd_extract!(a, 1))) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14794,20 +15575,22 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { - transmute(u16x4::splat(*ptr)) +pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + transmute(u64x1::new(simd_extract!(a, 1))) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14817,20 +15600,19 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { - transmute(u32x2::splat(*ptr)) +pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { + unsafe { int64x1_t([simd_extract!(a, 1)]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14840,20 +15622,22 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { - transmute(u8x8::splat(*ptr)) +pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + int64x1_t([simd_extract!(a, 1)]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14863,20 +15647,19 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { - transmute(f32x4::splat(*ptr)) +pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { uint64x1_t([simd_extract!(a, 1)]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14886,44 +15669,102 @@ pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { - transmute(u16x8::splat(*ptr)) +pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + uint64x1_t([simd_extract!(a, 1)]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_extract!(a, LANE as u32) } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_extract!(a, LANE as u32) + } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] -pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { - transmute(u8x16::splat(*ptr)) +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_extract!(a, LANE as u32) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(a, LANE as u32) + } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14932,21 +15773,18 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { - transmute(i16x8::splat(*ptr)) +pub fn vget_lane_f32(v: float32x2_t) -> f32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14955,21 +15793,21 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { - transmute(i32x4::splat(*ptr)) +pub fn vget_lane_f32(v: float32x2_t) -> f32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: float32x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14978,21 +15816,18 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { - transmute(i64x2::splat(*ptr)) +pub fn vget_lane_p16(v: poly16x4_t) -> p16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15001,21 +15836,21 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { - transmute(i8x16::splat(*ptr)) +pub fn vget_lane_p16(v: poly16x4_t) -> p16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: poly16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15024,21 +15859,18 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { - transmute(u16x8::splat(*ptr)) +pub fn vget_lane_p8(v: poly8x8_t) -> p8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15047,21 +15879,21 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { - transmute(u32x4::splat(*ptr)) +pub fn vget_lane_p8(v: poly8x8_t) -> p8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: poly8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15070,21 +15902,18 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { - transmute(u64x2::splat(*ptr)) +pub fn vget_lane_s16(v: int16x4_t) -> i16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15093,21 +15922,21 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { - transmute(u8x16::splat(*ptr)) +pub fn vget_lane_s16(v: int16x4_t) -> i16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: int16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] #[inline] -#[target_feature(enable = "neon,aes")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15116,30 +15945,18 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { - let x: poly64x1_t; - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - { - x = crate::core_arch::aarch64::vld1_p64(ptr); - } - #[cfg(target_arch = "arm")] - { - x = crate::core_arch::arm::vld1_p64(ptr); - }; - x +pub fn vget_lane_s32(v: int32x2_t) -> i32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15148,30 +15965,21 @@ pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { - let x: int64x1_t; - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - { - x = crate::core_arch::aarch64::vld1_s64(ptr); +pub fn vget_lane_s32(v: int32x2_t) -> i32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: int32x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) } - #[cfg(target_arch = "arm")] - { - x = crate::core_arch::arm::vld1_s64(ptr); - }; - x } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15180,407 +15988,127 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { - let x: uint64x1_t; - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - { - x = crate::core_arch::aarch64::vld1_u64(ptr); - } - #[cfg(target_arch = "arm")] - { - x = crate::core_arch::arm::vld1_u64(ptr); - }; - x -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { - transmute(vld1_v4f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { - let ret_val: float16x4_t = transmute(vld1_v4f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { - transmute(vld1q_v8f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) +pub fn vget_lane_s8(v: int8x8_t) -> i8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] #[inline] #[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { - let ret_val: float16x8_t = transmute(vld1q_v8f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s8(v: int8x8_t) -> i8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: int8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u16(v: uint16x4_t) -> u16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u16(v: uint16x4_t) -> u16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: uint16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u32(v: uint32x2_t) -> u32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v2f32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v4f32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v8i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v16i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v4i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v8i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v2i32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v4i32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v1i64::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v2i64::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v8i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v16i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v4i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v8i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,aes")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v2i64::(ptr as *const i8)) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u32(v: uint32x2_t) -> u32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: uint32x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15589,21 +16117,18 @@ pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u8(v: uint8x8_t) -> u8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15612,21 +16137,21 @@ pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u8(v: uint8x8_t) -> u8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: uint8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15635,21 +16160,18 @@ pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15658,21 +16180,21 @@ pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: float32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15681,21 +16203,18 @@ pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15704,62 +16223,41 @@ pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: poly16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 0) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) -} -#[doc = "Load one single-element structure to one lane of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 0) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15768,23 +16266,21 @@ pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: poly64x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15793,23 +16289,18 @@ pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15818,23 +16309,22 @@ pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { + let v: poly8x16_t = + simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15843,23 +16333,18 @@ pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> p target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15868,23 +16353,21 @@ pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: int16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr, LANE = 0) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15893,23 +16376,18 @@ pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { - static_assert!(LANE == 0); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15918,23 +16396,21 @@ pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: int32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15943,23 +16419,18 @@ pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> in target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15968,23 +16439,21 @@ pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: int64x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr, LANE = 0) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15993,23 +16462,18 @@ pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { - static_assert!(LANE == 0); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16018,23 +16482,22 @@ pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { + let v: int8x16_t = + simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16043,23 +16506,18 @@ pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> u target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16068,23 +16526,21 @@ pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: uint16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 15) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16093,23 +16549,18 @@ pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(LANE, 4); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16118,23 +16569,21 @@ pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: uint32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16143,23 +16592,18 @@ pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16168,23 +16612,21 @@ pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: uint64x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 15) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16193,23 +16635,18 @@ pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(LANE, 4); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16218,23 +16655,21 @@ pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { + let v: uint8x16_t = + simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16243,23 +16678,17 @@ pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vget_lane_p64(v: poly64x1_t) -> p64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16268,23 +16697,17 @@ pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vget_lane_s64(v: int64x1_t) -> i64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 15) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16293,23 +16716,17 @@ pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(LANE, 4); - simd_insert!(src, LANE as u32, *ptr) +pub fn vget_lane_u64(v: uint64x1_t) -> u64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, 0) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] #[inline] -#[target_feature(enable = "neon,aes")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr, LANE = 0) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16318,23 +16735,16 @@ pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { - static_assert!(LANE == 0); - simd_insert!(src, LANE as u32, *ptr) +pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] #[inline] -#[target_feature(enable = "neon,aes")] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16343,40 +16753,20 @@ pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,aes")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { - let a: *const i8 = ptr as *const i8; - let b: i32 = crate::mem::align_of::() as i32; - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] - fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; +pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - transmute(_vld1_v1i64(a, b)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16385,21 +16775,16 @@ pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16408,21 +16793,20 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16431,21 +16815,16 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16454,21 +16833,21 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16477,21 +16856,16 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16500,125 +16874,20 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v8i8::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v16i8::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v4i16::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v8i16::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v2i32::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v4i32::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v1i64::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v2i64::(ptr as *const i8) +pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16627,21 +16896,16 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16650,21 +16914,20 @@ pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16673,21 +16936,16 @@ pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16696,21 +16954,21 @@ pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16719,21 +16977,16 @@ pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16742,21 +16995,20 @@ pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16765,21 +17017,16 @@ pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16788,21 +17035,20 @@ pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16811,21 +17057,16 @@ pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16834,21 +17075,21 @@ pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16857,21 +17098,16 @@ pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { transmute(u64x1::new(simd_extract!(a, 0))) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16880,21 +17116,19 @@ pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + transmute(u64x1::new(simd_extract!(a, 0))) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16903,21 +17137,16 @@ pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { + unsafe { int64x1_t([simd_extract!(a, 0)]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16926,21 +17155,19 @@ pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + int64x1_t([simd_extract!(a, 0)]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16949,21 +17176,16 @@ pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { uint64x1_t([simd_extract!(a, 0)]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16972,20 +17194,21 @@ pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + uint64x1_t([simd_extract!(a, 0)]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -16995,20 +17218,26 @@ pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] + fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vhadd_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17018,20 +17247,26 @@ pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] + fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vhaddq_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17041,20 +17276,26 @@ pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] + fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vhadd_s16(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17064,20 +17305,26 @@ pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] + fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vhaddq_s16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17087,20 +17334,26 @@ pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] + fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vhadd_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17110,20 +17363,26 @@ pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] + fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vhaddq_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17133,20 +17392,26 @@ pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] + fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vhadd_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17156,20 +17421,26 @@ pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] + fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vhaddq_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17179,20 +17450,26 @@ pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] + fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vhadd_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17202,20 +17479,26 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] + fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vhaddq_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17225,20 +17508,26 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] + fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vhadd_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17248,20 +17537,26 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] + fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vhaddq_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17271,20 +17566,26 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] + fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vhsub_s16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17294,20 +17595,26 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] + fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vhsubq_s16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17317,20 +17624,26 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] + fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vhsub_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17340,20 +17653,26 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] + fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vhsubq_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17363,20 +17682,26 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] + fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vhsub_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17386,20 +17711,26 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] + fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vhsubq_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17409,20 +17740,26 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] + fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vhsub_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17432,20 +17769,26 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] + fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vhsubq_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17455,20 +17798,26 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] + fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vhsub_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17478,20 +17827,26 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] + fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vhsubq_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17501,20 +17856,26 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] + fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vhsub_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17524,43 +17885,66 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] + fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vhsubq_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(ld1r) )] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { + let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) )] -pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { + let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17570,20 +17954,20 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { + transmute(f32x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17593,20 +17977,20 @@ pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { + transmute(u16x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17616,20 +18000,20 @@ pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { + transmute(u8x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17639,20 +18023,20 @@ pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { + transmute(i16x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17662,20 +18046,20 @@ pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { + transmute(i32x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17685,20 +18069,20 @@ pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { + transmute(i8x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17708,20 +18092,20 @@ pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { + transmute(u16x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17731,20 +18115,20 @@ pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { + transmute(u32x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17754,20 +18138,20 @@ pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { + transmute(u8x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17777,20 +18161,20 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { + transmute(f32x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17800,20 +18184,20 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { + transmute(u16x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17823,20 +18207,20 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { + transmute(u8x16::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17846,20 +18230,20 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { + transmute(i16x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17869,20 +18253,20 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { + transmute(i32x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17892,20 +18276,20 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { + transmute(i64x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17915,20 +18299,20 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { + transmute(i8x16::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17938,20 +18322,20 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { + transmute(u16x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17961,20 +18345,20 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { + transmute(u32x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -17984,175 +18368,17 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v1i64(a: *const i8) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] - fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; - } - _vld1_v1i64(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2f32(a: *const i8) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] - fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; - } - _vld1_v2f32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2i32(a: *const i8) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] - fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; - } - _vld1_v2i32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v4i16(a: *const i8) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] - fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; - } - _vld1_v4i16(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v8i8(a: *const i8) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] - fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; - } - _vld1_v8i8(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v16i8(a: *const i8) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] - fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; - } - _vld1q_v16i8(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v2i64(a: *const i8) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] - fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; - } - _vld1q_v2i64(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4f32(a: *const i8) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] - fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; - } - _vld1q_v4f32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4i32(a: *const i8) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] - fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; - } - _vld1q_v4i32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v8i16(a: *const i8) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] - fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; - } - _vld1q_v8i16(a, ALIGN) -} -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] - fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; - } - _vld1_v4f16(a, b) -} -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] - fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; - } - _vld1q_v8f16(a, b) +pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { + transmute(u64x2::splat(*ptr)) } #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1r) @@ -18165,445 +18391,453 @@ unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t { - let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0))); - simd_shuffle!(x, x, [0, 0]) +pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { + transmute(u8x16::splat(*ptr)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")] - fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { + let x: poly64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_p64(ptr); } - _vld2_dup_f16(a as _, 2) + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_p64(ptr); + }; + x } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")] - fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { + let x: int64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_s64(ptr); } - _vld2q_dup_f16(a as _, 2) + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_s64(ptr); + }; + x } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4f16.p0" - )] - fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { + let x: uint64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_u64(ptr); } - _vld2_dup_f16(a as _) + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_u64(ptr); + }; + x } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) )] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v8f16.p0" - )] - fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; - } - _vld2q_dup_f16(a as _) +pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")] - fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t; - } - _vld2_dup_f32(a as *const i8, 4) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")] - fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t; - } - _vld2q_dup_f32(a as *const i8, 4) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")] - fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t; - } - _vld2_dup_s8(a as *const i8, 1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")] - fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t; - } - _vld2q_dup_s8(a as *const i8, 1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")] - fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t; - } - _vld2_dup_s16(a as *const i8, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2f32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")] - fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t; - } - _vld2q_dup_s16(a as *const i8, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4f32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")] - fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t; - } - _vld2_dup_s32(a as *const i8, 4) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")] - fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t; - } - _vld2q_dup_s32(a as *const i8, 4) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v2f32.p0" - )] - fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t; - } - _vld2_dup_f32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4f32.p0" - )] - fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t; - } - _vld2q_dup_f32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v8i8.p0" - )] - fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t; - } - _vld2_dup_s8(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2i32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v16i8.p0" - )] - fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t; - } - _vld2q_dup_s8(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4i32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4i16.p0" - )] - fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t; - } - _vld2_dup_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v1i64::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v8i16.p0" - )] - fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t; - } - _vld2q_dup_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v2i32.p0" - )] - fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t; - } - _vld2_dup_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4i32.p0" - )] - fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t; - } - _vld2q_dup_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t { - transmute(vld2_dup_s64(transmute(a))) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")] - fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t; - } - _vld2_dup_s64(a as *const i8, 8) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v1i64.p0" - )] - fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t; - } - _vld2_dup_s64(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,aes")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18613,21 +18847,20 @@ pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { - transmute(vld2_dup_s64(transmute(a))) +pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18637,21 +18870,20 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { - transmute(vld2_dup_s8(transmute(a))) +pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18661,24 +18893,20 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { - let mut ret_val: uint8x8x2_t = transmute(vld2_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18688,21 +18916,20 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { - transmute(vld2q_dup_s8(transmute(a))) +pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18712,32 +18939,20 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { - let mut ret_val: uint8x16x2_t = transmute(vld2q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18747,21 +18962,61 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { - transmute(vld2_dup_s16(transmute(a))) +pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -18771,24 +19026,22 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { - let mut ret_val: uint16x4x2_t = transmute(vld2_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -18798,21 +19051,22 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { - transmute(vld2q_dup_s16(transmute(a))) +pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -18822,24 +19076,22 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { - let mut ret_val: uint16x8x2_t = transmute(vld2q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -18849,21 +19101,22 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { - transmute(vld2_dup_s32(transmute(a))) +pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -18873,24 +19126,22 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { - let mut ret_val: uint32x2x2_t = transmute(vld2_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val +pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr, LANE = 0) )] #[cfg_attr( not(target_arch = "arm"), @@ -18900,21 +19151,22 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { - transmute(vld2q_dup_s32(transmute(a))) +pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -18924,24 +19176,22 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { - let mut ret_val: uint32x4x2_t = transmute(vld2q_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -18951,21 +19201,22 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { - transmute(vld2_dup_s8(transmute(a))) +pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -18975,24 +19226,22 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { - let mut ret_val: poly8x8x2_t = transmute(vld2_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr, LANE = 0) )] #[cfg_attr( not(target_arch = "arm"), @@ -19002,21 +19251,22 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { - transmute(vld2q_dup_s8(transmute(a))) +pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19026,32 +19276,22 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { - let mut ret_val: poly8x16x2_t = transmute(vld2q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -19061,21 +19301,22 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { - transmute(vld2_dup_s16(transmute(a))) +pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19085,24 +19326,22 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { - let mut ret_val: poly16x4x2_t = transmute(vld2_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 15) )] #[cfg_attr( not(target_arch = "arm"), @@ -19112,21 +19351,22 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { - transmute(vld2q_dup_s16(transmute(a))) +pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19136,812 +19376,507 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { - let mut ret_val: poly16x8x2_t = transmute(vld2q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")] - fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; - } - _vld2_f16(a as _, 2) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")] - fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; - } - _vld2q_f16(a as _, 2) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld1, LANE = 15) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4f16.p0" - )] - fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; - } - _vld2_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld1, LANE = 7) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v8f16.p0" - )] - fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; - } - _vld2q_f16(a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")] - fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t; - } - _vld2_f32(a as *const i8, 4) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")] - fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t; - } - _vld2q_f32(a as *const i8, 4) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")] - fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t; - } - _vld2_s8(a as *const i8, 1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")] - fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t; - } - _vld2q_s8(a as *const i8, 1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 15) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")] - fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t; - } - _vld2_s16(a as *const i8, 2) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr, LANE = 0) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")] - fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t; - } - _vld2q_s16(a as *const i8, 2) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,aes")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { + let a: *const i8 = ptr as *const i8; + let b: i32 = crate::mem::align_of::() as i32; unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")] - fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] + fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; } - _vld2_s32(a as *const i8, 4) + transmute(_vld1_v1i64(a, b)) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")] - fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t; - } - _vld2q_s32(a as *const i8, 4) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v2f32.p0" - )] - fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t; - } - _vld2_f32(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4f32.p0" - )] - fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t; - } - _vld2q_f32(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v8i8.p0" - )] - fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t; - } - _vld2_s8(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v16i8.p0" - )] - fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t; - } - _vld2q_s8(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4i16.p0" - )] - fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t; - } - _vld2_s16(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v8i16.p0" - )] - fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t; - } - _vld2q_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v8i8::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v2i32.p0" - )] - fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t; - } - _vld2_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v16i8::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4i32.p0" - )] - fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t; - } - _vld2q_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v4i16::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] - fn _vld2_lane_f16( - ptr: *const f16, - a: float16x4_t, - b: float16x4_t, - n: i32, - size: i32, - ) -> float16x4x2_t; - } - _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v8i16::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] - fn _vld2q_lane_f16( - ptr: *const f16, - a: float16x8_t, - b: float16x8_t, - n: i32, - size: i32, - ) -> float16x8x2_t; - } - _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v2i32::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" - )] - fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) - -> float16x4x2_t; - } - _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v4i32::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" - )] - fn _vld2q_lane_f16( - a: float16x8_t, - b: float16x8_t, - n: i64, - ptr: *const f16, - ) -> float16x8x2_t; - } - _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0" - )] - fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t; - } - _vld2_lane_f32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0" - )] - fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) - -> float32x4x2_t; - } - _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0" - )] - fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t; - } - _vld2_lane_s8(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0" - )] - fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t; - } - _vld2_lane_s16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0" - )] - fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t; - } - _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0" - )] - fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t; - } - _vld2_lane_s32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0" - )] - fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t; - } - _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")] - fn _vld2_lane_f32( - ptr: *const i8, - a: float32x2_t, - b: float32x2_t, - n: i32, - size: i32, - ) -> float32x2x2_t; - } - _vld2_lane_f32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")] - fn _vld2q_lane_f32( - ptr: *const i8, - a: float32x4_t, - b: float32x4_t, - n: i32, - size: i32, - ) -> float32x4x2_t; - } - _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")] - fn _vld2q_lane_s16( - ptr: *const i8, - a: int16x8_t, - b: int16x8_t, - n: i32, - size: i32, - ) -> int16x8x2_t; - } - _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")] - fn _vld2q_lane_s32( - ptr: *const i8, - a: int32x4_t, - b: int32x4_t, - n: i32, - size: i32, - ) -> int32x4x2_t; - } - _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")] - fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) - -> int8x8x2_t; - } - _vld2_lane_s8(a as _, b.0, b.1, LANE, 1) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v1i64::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")] - fn _vld2_lane_s16( - ptr: *const i8, - a: int16x4_t, - b: int16x4_t, - n: i32, - size: i32, - ) -> int16x4x2_t; - } - _vld2_lane_s16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")] - fn _vld2_lane_s32( - ptr: *const i8, - a: int32x2_t, - b: int32x2_t, - n: i32, - size: i32, - ) -> int32x2x2_t; - } - _vld2_lane_s32(a as _, b.0, b.1, LANE, 4) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v2i64::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -19950,23 +19885,21 @@ pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> i target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -19975,23 +19908,21 @@ pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uin target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld2_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20000,23 +19931,21 @@ pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20025,23 +19954,21 @@ pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - transmute(vld2_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20050,23 +19977,21 @@ pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld2q_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20075,23 +20000,21 @@ pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20100,23 +20023,21 @@ pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> pol target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld2_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20125,21 +20046,20 @@ pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20149,55 +20069,20 @@ pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t { - transmute(vld2_s64(transmute(a))) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")] - fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t; - } - _vld2_s64(a as *const i8, 8) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v1i64.p0" - )] - fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t; - } - _vld2_s64(a as _) +pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20207,20 +20092,20 @@ pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { - transmute(vld2_s64(transmute(a))) +pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20230,20 +20115,20 @@ pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { - transmute(vld2_s8(transmute(a))) +pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20253,20 +20138,20 @@ pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { - transmute(vld2q_s8(transmute(a))) +pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20276,20 +20161,20 @@ pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { - transmute(vld2_s16(transmute(a))) +pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20299,20 +20184,20 @@ pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { - transmute(vld2q_s16(transmute(a))) +pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20322,20 +20207,20 @@ pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { - transmute(vld2_s32(transmute(a))) +pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20345,20 +20230,20 @@ pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { - transmute(vld2q_s32(transmute(a))) +pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20368,20 +20253,20 @@ pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { - transmute(vld2_s8(transmute(a))) +pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20391,20 +20276,20 @@ pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { - transmute(vld2q_s8(transmute(a))) +pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20414,20 +20299,20 @@ pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { - transmute(vld2_s16(transmute(a))) +pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20437,405 +20322,296 @@ pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { - transmute(vld2q_s16(transmute(a))) +pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")] - fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; - } - _vld3_dup_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")] - fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; - } - _vld3q_dup_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4f16.p0" - )] - fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; - } - _vld3_dup_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v8f16.p0" - )] - fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; - } - _vld3q_dup_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v2f32.p0" - )] - fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t; - } - _vld3_dup_f32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4f32.p0" - )] - fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t; - } - _vld3q_dup_f32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v8i8.p0" - )] - fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t; - } - _vld3_dup_s8(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v16i8.p0" - )] - fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t; - } - _vld3q_dup_s8(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4i16.p0" - )] - fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t; - } - _vld3_dup_s16(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v8i16.p0" - )] - fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t; - } - _vld3q_dup_s16(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v2i32.p0" - )] - fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t; - } - _vld3_dup_s32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4i32.p0" - )] - fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t; - } - _vld3q_dup_s32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v1i64.p0" - )] - fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t; - } - _vld3_dup_s64(a as _) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")] - fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t; - } - _vld3_dup_f32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")] - fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t; - } - _vld3q_dup_f32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")] - fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t; - } - _vld3_dup_s8(a as *const i8, 1) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")] - fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t; - } - _vld3q_dup_s8(a as *const i8, 1) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")] - fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t; - } - _vld3_dup_s16(a as *const i8, 2) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")] - fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t; - } - _vld3q_dup_s16(a as *const i8, 2) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")] - fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t; - } - _vld3_dup_s32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")] - fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t; - } - _vld3q_dup_s32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20845,36 +20621,20 @@ pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t { - transmute(vld3_dup_s64(transmute(a))) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")] - fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t; - } - _vld3_dup_s64(a as *const i8, 8) +pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20884,21 +20644,20 @@ pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { - transmute(vld3_dup_s64(transmute(a))) +pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20908,21 +20667,20 @@ pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { - transmute(vld3_dup_s8(transmute(a))) +pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20932,25 +20690,20 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { - let mut ret_val: uint8x8x3_t = transmute(vld3_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20960,21 +20713,20 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { - transmute(vld3q_dup_s8(transmute(a))) +pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20984,37 +20736,20 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { - let mut ret_val: uint8x16x3_t = transmute(vld3q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21024,21 +20759,20 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { - transmute(vld3_dup_s16(transmute(a))) +pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21048,25 +20782,20 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { - let mut ret_val: uint16x4x3_t = transmute(vld3_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21076,21 +20805,20 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { - transmute(vld3q_dup_s16(transmute(a))) +pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21100,25 +20828,20 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { - let mut ret_val: uint16x8x3_t = transmute(vld3q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21128,21 +20851,20 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { - transmute(vld3_dup_s32(transmute(a))) +pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21152,25 +20874,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { - let mut ret_val: uint32x2x3_t = transmute(vld3_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val +pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21180,21 +20897,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { - transmute(vld3q_dup_s32(transmute(a))) +pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21204,25 +20920,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { - let mut ret_val: uint32x4x3_t = transmute(vld3q_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21232,21 +20943,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { - transmute(vld3_dup_s8(transmute(a))) +pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21256,25 +20966,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { - let mut ret_val: poly8x8x3_t = transmute(vld3_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21284,21 +20989,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { - transmute(vld3q_dup_s8(transmute(a))) +pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21308,37 +21012,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { - let mut ret_val: poly8x16x3_t = transmute(vld3q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21348,21 +21035,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { - transmute(vld3_dup_s16(transmute(a))) +pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21372,25 +21058,20 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { - let mut ret_val: poly16x4x3_t = transmute(vld3_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21400,21 +21081,20 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { - transmute(vld3q_dup_s16(transmute(a))) +pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21424,365 +21104,369 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { - let mut ret_val: poly16x8x3_t = transmute(vld3q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")] - fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; - } - _vld3_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")] - fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; - } - _vld3q_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { - crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { - crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { - crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { - crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { - crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { - crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { - crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v1i64(a: *const i8) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] + fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; + } + _vld1_v1i64(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { - crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v2f32(a: *const i8) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] + fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; + } + _vld1_v2f32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { - crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v2i32(a: *const i8) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] + fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; + } + _vld1_v2i32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { - crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v4i16(a: *const i8) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] + fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; + } + _vld1_v4i16(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { +unsafe fn vld1_v8i8(a: *const i8) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")] - fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] + fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; } - _vld3_f32(a as *const i8, 4) + _vld1_v8i8(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { +unsafe fn vld1q_v16i8(a: *const i8) -> int8x16_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")] - fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] + fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; } - _vld3q_f32(a as *const i8, 4) + _vld1q_v16i8(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { +unsafe fn vld1q_v2i64(a: *const i8) -> int64x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")] - fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] + fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; } - _vld3_s8(a as *const i8, 1) + _vld1q_v2i64(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { +unsafe fn vld1q_v4f32(a: *const i8) -> float32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")] - fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] + fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; } - _vld3q_s8(a as *const i8, 1) + _vld1q_v4f32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { +unsafe fn vld1q_v4i32(a: *const i8) -> int32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")] - fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] + fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; } - _vld3_s16(a as *const i8, 2) + _vld1q_v4i32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { +unsafe fn vld1q_v8i16(a: *const i8) -> int16x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")] - fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] + fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; } - _vld3q_s16(a as *const i8, 2) + _vld1q_v8i16(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { +#[cfg(not(target_arch = "arm64ec"))] +unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")] - fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] + fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; } - _vld3_s32(a as *const i8, 4) + _vld1_v4f16(a, b) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { +#[cfg(not(target_arch = "arm64ec"))] +unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")] - fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] + fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; } - _vld3q_s32(a as *const i8, 4) + _vld1q_v8f16(a, b) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t { + let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] - fn _vld3_lane_f16( - ptr: *const f16, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - n: i32, - size: i32, - ) -> float16x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")] + fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; } - _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2_dup_f16(a as _, 2) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] - fn _vld3q_lane_f16( - ptr: *const f16, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - n: i32, - size: i32, - ) -> float16x8x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")] + fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; } - _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2q_dup_f16(a as _, 2) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] @@ -21790,31 +21474,23 @@ pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) - #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" + link_name = "llvm.aarch64.neon.ld2r.v4f16.p0" )] - fn _vld3_lane_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - n: i64, - ptr: *const f16, - ) -> float16x4x3_t; + fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; } - _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_f16(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] @@ -21822,381 +21498,371 @@ pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" + link_name = "llvm.aarch64.neon.ld2r.v8f16.p0" )] - fn _vld3q_lane_f16( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - n: i64, - ptr: *const f16, - ) -> float16x8x3_t; + fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; } - _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_f16(a as _) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0" - )] - fn _vld3_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - n: i64, - ptr: *const i8, - ) -> float32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")] + fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t; } - _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0" - )] - fn _vld3q_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - n: i64, - ptr: *const i8, - ) -> float32x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")] + fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t; } - _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")] - fn _vld3_lane_f32( - ptr: *const i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - n: i32, - size: i32, - ) -> float32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")] + fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t; } - _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) + _vld2_dup_s8(a as *const i8, 1) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { - static_assert_uimm_bits!(LANE, 3); +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")] + fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t; + } + _vld2q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")] + fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t; + } + _vld2_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")] + fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t; + } + _vld2q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")] + fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t; + } + _vld2_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")] + fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t; + } + _vld2q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0" + link_name = "llvm.aarch64.neon.ld2r.v2f32.p0" )] - fn _vld3_lane_s8( - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - n: i64, - ptr: *const i8, - ) -> int8x8x3_t; + fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t; } - _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_f32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0" + link_name = "llvm.aarch64.neon.ld2r.v4f32.p0" )] - fn _vld3_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - n: i64, - ptr: *const i8, - ) -> int16x4x3_t; + fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t; } - _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_f32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { - static_assert_uimm_bits!(LANE, 4); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0" + link_name = "llvm.aarch64.neon.ld2r.v8i8.p0" )] - fn _vld3q_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - n: i64, - ptr: *const i8, - ) -> int16x8x3_t; + fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t; } - _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_s8(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0" + link_name = "llvm.aarch64.neon.ld2r.v16i8.p0" )] - fn _vld3_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - n: i64, - ptr: *const i8, - ) -> int32x2x3_t; + fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t; } - _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_s8(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0" + link_name = "llvm.aarch64.neon.ld2r.v4i16.p0" )] - fn _vld3q_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - n: i64, - ptr: *const i8, - ) -> int32x4x3_t; + fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t; } - _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_s16(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { - static_assert_uimm_bits!(LANE, 3); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")] - fn _vld3_lane_s8( - ptr: *const i8, - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - n: i32, - size: i32, - ) -> int8x8x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8i16.p0" + )] + fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t; } - _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) + _vld2q_dup_s16(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")] - fn _vld3_lane_s16( - ptr: *const i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - n: i32, - size: i32, - ) -> int16x4x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v2i32.p0" + )] + fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t; } - _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2_dup_s32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { - static_assert_uimm_bits!(LANE, 3); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")] - fn _vld3q_lane_s16( - ptr: *const i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - n: i32, - size: i32, - ) -> int16x8x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4i32.p0" + )] + fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t; } - _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2q_dup_s32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")] - fn _vld3_lane_s32( - ptr: *const i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - n: i32, - size: i32, - ) -> int32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")] + fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t; } - _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) + _vld2_dup_s64(a as *const i8, 8) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")] - fn _vld3q_lane_s32( - ptr: *const i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - n: i32, - size: i32, - ) -> int32x4x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v1i64.p0" + )] + fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t; } - _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) + _vld2_dup_s64(a as _) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22205,23 +21871,21 @@ pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22230,23 +21894,21 @@ pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uin target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld3_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22255,23 +21917,21 @@ pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22280,23 +21940,21 @@ pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t { - static_assert_uimm_bits!(LANE, 1); - transmute(vld3_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22305,23 +21963,21 @@ pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld3q_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22330,23 +21986,21 @@ pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_dup_s32(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22355,23 +22009,21 @@ pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> pol target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld3_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_dup_s32(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22380,21 +22032,20 @@ pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld2r) )] #[cfg_attr( not(target_arch = "arm"), @@ -22404,232 +22055,20 @@ pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t { - transmute(vld3_s64(transmute(a))) +pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")] - fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t; - } - _vld3_s64(a as *const i8, 8) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t { - transmute(vld3_s64(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { - transmute(vld3_s8(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { - transmute(vld3q_s8(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { - transmute(vld3_s16(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { - transmute(vld3q_s16(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { - transmute(vld3_s32(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { - transmute(vld3q_s32(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { - transmute(vld3_s8(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld2r) )] #[cfg_attr( not(target_arch = "arm"), @@ -22639,20 +22078,20 @@ pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { - transmute(vld3q_s8(transmute(a))) +pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld2r) )] #[cfg_attr( not(target_arch = "arm"), @@ -22662,818 +22101,809 @@ pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { - transmute(vld3_s16(transmute(a))) +pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { - transmute(vld3q_s16(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")] - fn _vld3q_lane_f32( - ptr: *const i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - n: i32, - size: i32, - ) -> float32x4x3_t; - } - _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")] - fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")] + fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; } - _vld4_dup_f16(a as _, 2) + _vld2_f16(a as _, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")] - fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")] + fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; } - _vld4q_dup_f16(a as _, 2) + _vld2q_f16(a as _, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4f16.p0" + link_name = "llvm.aarch64.neon.ld2.v4f16.p0" )] - fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; + fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; } - _vld4_dup_f16(a as _) + _vld2_f16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v8f16.p0" + link_name = "llvm.aarch64.neon.ld2.v8f16.p0" )] - fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; + fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; } - _vld4q_dup_f16(a as _) + _vld2q_f16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")] - fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")] + fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t; } - _vld4_dup_f32(a as *const i8, 4) + _vld2_f32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")] - fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")] + fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t; } - _vld4q_dup_f32(a as *const i8, 4) + _vld2q_f32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")] - fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")] + fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t; } - _vld4_dup_s8(a as *const i8, 1) + _vld2_s8(a as *const i8, 1) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")] - fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")] + fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t; } - _vld4q_dup_s8(a as *const i8, 1) + _vld2q_s8(a as *const i8, 1) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")] - fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")] + fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t; } - _vld4_dup_s16(a as *const i8, 2) + _vld2_s16(a as *const i8, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")] - fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")] + fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t; } - _vld4q_dup_s16(a as *const i8, 2) + _vld2q_s16(a as *const i8, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")] - fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")] + fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t; } - _vld4_dup_s32(a as *const i8, 4) + _vld2_s32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")] - fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")] + fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t; } - _vld4q_dup_s32(a as *const i8, 4) + _vld2q_s32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v2f32.p0" )] - fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t; + fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t; } - _vld4_dup_f32(a as _) + _vld2_f32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v4f32.p0" )] - fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t; + fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t; } - _vld4q_dup_f32(a as _) + _vld2q_f32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v8i8.p0" )] - fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t; + fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t; } - _vld4_dup_s8(a as _) + _vld2_s8(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v16i8.p0" )] - fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t; + fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t; } - _vld4q_dup_s8(a as _) + _vld2q_s8(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v4i16.p0" )] - fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t; + fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t; } - _vld4_dup_s16(a as _) + _vld2_s16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v8i16.p0" )] - fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t; + fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t; } - _vld4q_dup_s16(a as _) + _vld2q_s16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v2i32.p0" )] - fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t; + fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t; } - _vld4_dup_s32(a as _) + _vld2_s32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v4i32.p0" )] - fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t; + fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t; } - _vld4q_dup_s32(a as _) + _vld2q_s32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0" - )] - fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] + fn _vld2_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x2_t; } - _vld4_dup_s64(a as _) -} -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t { - transmute(vld4_dup_s64(transmute(a))) + _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(nop))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")] - fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] + fn _vld2q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x2_t; } - _vld4_dup_s64(a as *const i8, 8) + _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld2, LANE = 0) )] -pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t { - transmute(vld4_dup_s64(transmute(a))) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" + )] + fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) + -> float16x4x2_t; + } + _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld2, LANE = 0) )] -pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { - transmute(vld4_dup_s8(transmute(a))) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" + )] + fn _vld2q_lane_f16( + a: float16x8_t, + b: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { - let mut ret_val: uint8x8x4_t = transmute(vld4_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0" + )] + fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t; + } + _vld2_lane_f32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { - transmute(vld4q_dup_s8(transmute(a))) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0" + )] + fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) + -> float32x4x2_t; + } + _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { - let mut ret_val: uint8x16x4_t = transmute(vld4q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.3 = simd_shuffle!( - ret_val.3, - ret_val.3, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0" + )] + fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t; + } + _vld2_lane_s8(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { - transmute(vld4_dup_s16(transmute(a))) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0" + )] + fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t; + } + _vld2_lane_s16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { - let mut ret_val: uint16x4x4_t = transmute(vld4_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0" + )] + fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t; + } + _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { - transmute(vld4q_dup_s16(transmute(a))) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0" + )] + fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t; + } + _vld2_lane_s32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { - let mut ret_val: uint16x8x4_t = transmute(vld4q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0" + )] + fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t; + } + _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { - transmute(vld4_dup_s32(transmute(a))) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")] + fn _vld2_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x2_t; + } + _vld2_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { - let mut ret_val: uint32x2x4_t = transmute(vld4_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")] + fn _vld2q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x2_t; + } + _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { - transmute(vld4q_dup_s32(transmute(a))) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")] + fn _vld2q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x2_t; + } + _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")] + fn _vld2q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x2_t; + } + _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")] + fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) + -> int8x8x2_t; + } + _vld2_lane_s8(a as _, b.0, b.1, LANE, 1) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")] + fn _vld2_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x2_t; + } + _vld2_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")] + fn _vld2_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x2_t; + } + _vld2_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23482,27 +22912,23 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { - let mut ret_val: uint32x4x4_t = transmute(vld4q_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23511,22 +22937,23 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { - transmute(vld4_dup_s8(transmute(a))) +pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23535,27 +22962,23 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { - let mut ret_val: poly8x8x4_t = transmute(vld4_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23564,22 +22987,23 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { - transmute(vld4q_dup_s8(transmute(a))) +pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld2_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23588,43 +23012,23 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { - let mut ret_val: poly8x16x4_t = transmute(vld4q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.3 = simd_shuffle!( - ret_val.3, - ret_val.3, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2q_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23633,22 +23037,23 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { - transmute(vld4_dup_s16(transmute(a))) +pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23657,27 +23062,23 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { - let mut ret_val: poly16x4x4_t = transmute(vld4_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23686,21 +23087,21 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { - transmute(vld4q_dup_s16(transmute(a))) +pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -23710,817 +23111,694 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { - let mut ret_val: poly16x8x4_t = transmute(vld4q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_s64(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")] - fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")] + fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t; } - _vld4_f16(a as _, 2) + _vld2_s64(a as *const i8, 8) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")] - fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v1i64.p0" + )] + fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t; } - _vld4q_f16(a as _, 2) + _vld2_s64(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(nop) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { - crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_s64(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { - crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { - crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { - crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { - crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { - crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { - crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { - crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { - crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { - crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")] - fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t; - } - _vld4_f32(a as *const i8, 4) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")] - fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t; - } - _vld4q_f32(a as *const i8, 4) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")] - fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t; - } - _vld4_s8(a as *const i8, 1) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")] - fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t; - } - _vld4q_s8(a as *const i8, 1) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")] - fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t; - } - _vld4_s16(a as *const i8, 2) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")] - fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t; - } - _vld4q_s16(a as *const i8, 2) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")] - fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t; - } - _vld4_s32(a as *const i8, 4) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")] - fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t; - } - _vld4q_s32(a as *const i8, 4) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] - fn _vld4_lane_f16( - ptr: *const f16, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i32, - size: i32, - ) -> float16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")] + fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; } - _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3_dup_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] - fn _vld4q_lane_f16( - ptr: *const f16, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i32, - size: i32, - ) -> float16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")] + fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; } - _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3q_dup_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + link_name = "llvm.aarch64.neon.ld3r.v4f16.p0" )] - fn _vld4_lane_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i64, - ptr: *const f16, - ) -> float16x4x4_t; + fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; } - _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_f16(a as _) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + link_name = "llvm.aarch64.neon.ld3r.v8f16.p0" )] - fn _vld4q_lane_f16( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i64, - ptr: *const f16, - ) -> float16x8x4_t; + fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; } - _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_f16(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0" + link_name = "llvm.aarch64.neon.ld3r.v2f32.p0" )] - fn _vld4_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i64, - ptr: *const i8, - ) -> float32x2x4_t; + fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t; } - _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_f32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0" + link_name = "llvm.aarch64.neon.ld3r.v4f32.p0" )] - fn _vld4q_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i64, - ptr: *const i8, - ) -> float32x4x4_t; + fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t; } - _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_f32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0" + link_name = "llvm.aarch64.neon.ld3r.v8i8.p0" )] - fn _vld4_lane_s8( - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - n: i64, - ptr: *const i8, - ) -> int8x8x4_t; + fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t; } - _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_s8(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0" + link_name = "llvm.aarch64.neon.ld3r.v16i8.p0" )] - fn _vld4_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i64, - ptr: *const i8, - ) -> int16x4x4_t; + fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t; } - _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_s8(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0" + link_name = "llvm.aarch64.neon.ld3r.v4i16.p0" )] - fn _vld4q_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i64, - ptr: *const i8, - ) -> int16x8x4_t; + fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t; } - _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_s16(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0" + link_name = "llvm.aarch64.neon.ld3r.v8i16.p0" )] - fn _vld4_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i64, - ptr: *const i8, - ) -> int32x2x4_t; + fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t; } - _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_s16(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0" + link_name = "llvm.aarch64.neon.ld3r.v2i32.p0" )] - fn _vld4q_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i64, - ptr: *const i8, - ) -> int32x4x4_t; + fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t; } - _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_s32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")] - fn _vld4_lane_f32( - ptr: *const i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i32, - size: i32, - ) -> float32x2x4_t; - } - _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4i32.p0" + )] + fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t; + } + _vld3q_dup_s32(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v1i64.p0" + )] + fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t; + } + _vld3_dup_s64(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")] - fn _vld4q_lane_f32( - ptr: *const i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i32, - size: i32, - ) -> float32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")] + fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t; } - _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + _vld3_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")] - fn _vld4_lane_s8( - ptr: *const i8, - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - n: i32, - size: i32, - ) -> int8x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")] + fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t; } - _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) + _vld3q_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")] - fn _vld4_lane_s16( - ptr: *const i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i32, - size: i32, - ) -> int16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")] + fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t; } - _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3_dup_s8(a as *const i8, 1) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")] - fn _vld4q_lane_s16( - ptr: *const i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i32, - size: i32, - ) -> int16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")] + fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t; } - _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3q_dup_s8(a as *const i8, 1) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")] - fn _vld4_lane_s32( - ptr: *const i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i32, - size: i32, - ) -> int32x2x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")] + fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t; } - _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + _vld3_dup_s16(a as *const i8, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")] - fn _vld4q_lane_s32( - ptr: *const i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i32, - size: i32, - ) -> int32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")] + fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t; } - _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + _vld3q_dup_s16(a as *const i8, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")] + fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + } + _vld3_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")] + fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + } + _vld3q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24529,23 +23807,37 @@ pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")] + fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t; + } + _vld3_dup_s64(a as *const i8, 8) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24554,23 +23846,21 @@ pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uin target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld4_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24579,23 +23869,21 @@ pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24604,23 +23892,21 @@ pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t { - static_assert_uimm_bits!(LANE, 1); - transmute(vld4_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24629,23 +23915,21 @@ pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld4q_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24654,23 +23938,21 @@ pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24679,23 +23961,21 @@ pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> pol target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld4_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_dup_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24704,21 +23984,20 @@ pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_dup_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -24728,48 +24007,20 @@ pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t { - transmute(vld4_s64(transmute(a))) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")] - fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t; - } - _vld4_s64(a as *const i8, 8) +pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -24779,20 +24030,20 @@ pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { - transmute(vld4_s64(transmute(a))) +pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -24802,20 +24053,20 @@ pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { - transmute(vld4_s8(transmute(a))) +pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -24825,416 +24076,775 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { - transmute(vld4q_s8(transmute(a))) +pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { - transmute(vld4_s16(transmute(a))) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")] + fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { - transmute(vld4q_s16(transmute(a))) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")] + fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld3) )] -pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { - transmute(vld4_s32(transmute(a))) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(ld3) )] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { - transmute(vld4q_s32(transmute(a))) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { - transmute(vld4_s8(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { - transmute(vld4q_s8(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { - transmute(vld4_s16(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { - transmute(vld4q_s16(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a) } -#[doc = "Store SIMD&FP register (immediate offset)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vldrq_p128(a: *const p128) -> p128 { - *a +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")] + fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t; + } + _vld3_f32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")] + fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t; + } + _vld3q_f32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")] + fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t; + } + _vld3_s8(a as *const i8, 1) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")] + fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t; + } + _vld3q_s8(a as *const i8, 1) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")] + fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t; + } + _vld3_s16(a as *const i8, 2) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")] + fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t; + } + _vld3q_s16(a as *const i8, 2) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")] + fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + } + _vld3_s32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")] + fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + } + _vld3q_s32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v4f16" - )] - fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] + fn _vld3_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x3_t; } - unsafe { _vmax_f16(a, b) } + _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] + fn _vld3q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v8f16" + link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" )] - fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + fn _vld3_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x3_t; } - unsafe { _vmaxq_f16(a, b) } + _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld3, LANE = 0) )] -pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v2f32" + link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" )] - fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + fn _vld3q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x3_t; } - unsafe { _vmax_f32(a, b) } + _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v4f32" + link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0" )] - fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + fn _vld3_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i64, + ptr: *const i8, + ) -> float32x2x3_t; } - unsafe { _vmaxq_f32(a, b) } + _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { - let mask: int8x8_t = simd_ge(a, b); - simd_select(mask, a, b) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0" + )] + fn _vld3q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i64, + ptr: *const i8, + ) -> float32x4x3_t; } + _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")] + fn _vld3_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x3_t; + } + _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { - let mask: int8x16_t = simd_ge(a, b); - simd_select(mask, a, b) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0" + )] + fn _vld3_lane_s8( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + n: i64, + ptr: *const i8, + ) -> int8x8x3_t; } + _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { - let mask: int16x4_t = simd_ge(a, b); - simd_select(mask, a, b) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0" + )] + fn _vld3_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i64, + ptr: *const i8, + ) -> int16x4x3_t; } + _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_uimm_bits!(LANE, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0" + )] + fn _vld3q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i64, + ptr: *const i8, + ) -> int16x8x3_t; + } + _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0" + )] + fn _vld3_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i64, + ptr: *const i8, + ) -> int32x2x3_t; + } + _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0" + )] + fn _vld3q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i64, + ptr: *const i8, + ) -> int32x4x3_t; + } + _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")] + fn _vld3_lane_s8( + ptr: *const i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + n: i32, + size: i32, + ) -> int8x8x3_t; + } + _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")] + fn _vld3_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x3_t; + } + _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")] + fn _vld3q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x3_t; + } + _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")] + fn _vld3_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x3_t; + } + _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")] + fn _vld3q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x3_t; + } + _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25243,22 +24853,23 @@ pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { - let mask: int16x8_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25267,22 +24878,23 @@ pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { - let mask: int32x2_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25291,22 +24903,23 @@ pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { - let mask: int32x4_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25315,22 +24928,23 @@ pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { - let mask: uint8x8_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld3_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25339,22 +24953,23 @@ pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { - let mask: uint8x16_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3q_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25363,22 +24978,23 @@ pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { - let mask: uint16x4_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25387,22 +25003,23 @@ pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { - let mask: uint16x8_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25411,21 +25028,21 @@ pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { - let mask: uint32x2_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -25435,110 +25052,48 @@ pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { - let mask: uint32x4_t = simd_ge(a, b); - simd_select(mask, a, b) - } -} -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v4f16" - )] - fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vmaxnm_f16(a, b) } +pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_s64(transmute(a))) } -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v8f16" - )] - fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; - } - unsafe { _vmaxnmq_f16(a, b) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v2f32" - )] - fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")] + fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t; } - unsafe { _vmaxnm_f32(a, b) } + _vld3_s64(a as *const i8, 8) } -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -25548,86 +25103,20 @@ pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v4f32" - )] - fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vmaxnmq_f32(a, b) } -} -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v4f16" - )] - fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vmin_f16(a, b) } -} -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v8f16" - )] - fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; - } - unsafe { _vminq_f16(a, b) } +pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_s64(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25637,26 +25126,20 @@ pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v2f32" - )] - fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vmin_f32(a, b) } +pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25666,26 +25149,20 @@ pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v4f32" - )] - fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vminq_f32(a, b) } +pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25695,21 +25172,20 @@ pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { - let mask: int8x8_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_s16(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25719,21 +25195,20 @@ pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { - let mask: int8x16_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_s16(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25743,21 +25218,20 @@ pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { - let mask: int16x4_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_s32(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25767,21 +25241,20 @@ pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { - let mask: int16x8_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_s32(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25791,21 +25264,20 @@ pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { - let mask: int32x2_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25815,21 +25287,20 @@ pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { - let mask: int32x4_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25839,21 +25310,20 @@ pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { - let mask: uint8x8_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_s16(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -25863,177 +25333,426 @@ pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { - let mask: uint8x16_t = simd_le(a, b); - simd_select(mask, a, b) +pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_s16(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")] + fn _vld3q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x3_t; } + _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { - let mask: uint16x4_t = simd_le(a, b); - simd_select(mask, a, b) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")] + fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; } + _vld4_dup_f16(a as _, 2) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { - let mask: uint16x8_t = simd_le(a, b); - simd_select(mask, a, b) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")] + fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; } + _vld4q_dup_f16(a as _, 2) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4r) )] -pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { - let mask: uint32x2_t = simd_le(a, b); - simd_select(mask, a, b) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4f16.p0" + )] + fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; } + _vld4_dup_f16(a as _) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(ld4r) )] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { - let mask: uint32x4_t = simd_le(a, b); - simd_select(mask, a, b) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8f16.p0" + )] + fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; } + _vld4q_dup_f16(a as _) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")] + fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + } + _vld4_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")] + fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + } + _vld4q_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")] + fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + } + _vld4_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")] + fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + } + _vld4q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")] + fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + } + _vld4_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")] + fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + } + _vld4q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")] + fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + } + _vld4_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")] + fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + } + _vld4q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v4f16" + link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0" )] - fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t; } - unsafe { _vminnm_f16(a, b) } + _vld4_dup_f32(a as _) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v8f16" + link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0" )] - fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t; } - unsafe { _vminnmq_f16(a, b) } + _vld4q_dup_f32(a as _) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0" + )] + fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t; + } + _vld4_dup_s8(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0" + )] + fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t; + } + _vld4q_dup_s8(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0" + )] + fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t; + } + _vld4_dup_s16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0" + )] + fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t; + } + _vld4q_dup_s16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0" + )] + fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t; + } + _vld4_dup_s32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0" + )] + fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t; + } + _vld4q_dup_s32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0" + )] + fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t; + } + _vld4_dup_s64(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26043,26 +25762,36 @@ pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v2f32" - )] - fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")] + fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t; } - unsafe { _vminnm_f32(a, b) } + _vld4_dup_s64(a as *const i8, 8) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26072,26 +25801,20 @@ pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v4f32" - )] - fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vminnmq_f32(a, b) } +pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) } -#[doc = "Floating-point multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26101,18 +25824,20 @@ pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) } -#[doc = "Floating-point multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26122,20 +25847,21 @@ pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26144,25 +25870,21 @@ pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - vmla_f32(a, b, vdup_lane_f32::(c)) +pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26171,25 +25893,21 @@ pub fn vmla_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 2); - vmla_f32(a, b, vdup_laneq_f32::(c)) +pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26198,25 +25916,21 @@ pub fn vmla_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); - vmlaq_f32(a, b, vdupq_lane_f32::(c)) +pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_dup_s32(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26225,25 +25939,21 @@ pub fn vmlaq_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlaq_f32(a, b, vdupq_laneq_f32::(c)) +pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_dup_s32(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26252,21 +25962,21 @@ pub fn vmlaq_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - vmla_s16(a, b, vdup_lane_s16::(c)) +pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26275,21 +25985,21 @@ pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - vmla_u16(a, b, vdup_lane_u16::(c)) +pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26298,21 +26008,21 @@ pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - vmla_s16(a, b, vdup_laneq_s16::(c)) +pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26321,632 +26031,812 @@ pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 3); - vmla_u16(a, b, vdup_laneq_u16::(c)) +pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); - vmlaq_s16(a, b, vdupq_lane_s16::(c)) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")] + fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_f16(a as _, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 2); - vmlaq_u16(a, b, vdupq_lane_u16::(c)) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")] + fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_f16(a as _, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4) )] -pub fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - vmlaq_s16(a, b, vdupq_laneq_s16::(c)) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4) )] -pub fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - vmlaq_u16(a, b, vdupq_laneq_u16::(c)) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - vmla_s32(a, b, vdup_lane_s32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - vmla_u32(a, b, vdup_lane_u32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - vmla_s32(a, b, vdup_laneq_s32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - vmla_u32(a, b, vdup_laneq_u32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - vmlaq_s32(a, b, vdupq_lane_s32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 1); - vmlaq_u32(a, b, vdupq_lane_u32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlaq_s32(a, b, vdupq_laneq_s32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlaq_u32(a, b, vdupq_laneq_u32::(c)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vmla_f32(a, b, vdup_n_f32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")] + fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + } + _vld4_f32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vmlaq_f32(a, b, vdupq_n_f32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")] + fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + } + _vld4q_f32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { - vmla_s16(a, b, vdup_n_s16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")] + fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + } + _vld4_s8(a as *const i8, 1) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { - vmlaq_s16(a, b, vdupq_n_s16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")] + fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + } + _vld4q_s8(a as *const i8, 1) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { - vmla_u16(a, b, vdup_n_u16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")] + fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + } + _vld4_s16(a as *const i8, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { - vmlaq_u16(a, b, vdupq_n_u16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")] + fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + } + _vld4q_s16(a as *const i8, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { - vmla_s32(a, b, vdup_n_s32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")] + fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + } + _vld4_s32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { - vmlaq_s32(a, b, vdupq_n_s32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")] + fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + } + _vld4q_s32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { - vmla_u32(a, b, vdup_n_u32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] + fn _vld4_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x4_t; + } + _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { - vmlaq_u32(a, b, vdupq_n_u32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] + fn _vld4q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4, LANE = 0) )] -pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + )] + fn _vld4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x4_t; + } + _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4, LANE = 0) )] -pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + )] + fn _vld4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0" + )] + fn _vld4_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i64, + ptr: *const i8, + ) -> float32x2x4_t; + } + _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0" + )] + fn _vld4q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i64, + ptr: *const i8, + ) -> float32x4x4_t; + } + _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0" + )] + fn _vld4_lane_s8( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i64, + ptr: *const i8, + ) -> int8x8x4_t; + } + _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } -} -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0" + )] + fn _vld4_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i64, + ptr: *const i8, + ) -> int16x4x4_t; + } + _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0" + )] + fn _vld4q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i64, + ptr: *const i8, + ) -> int16x8x4_t; + } + _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0" + )] + fn _vld4_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i64, + ptr: *const i8, + ) -> int32x2x4_t; + } + _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0" + )] + fn _vld4q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i64, + ptr: *const i8, + ) -> int32x4x4_t; + } + _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")] + fn _vld4_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x4_t; + } + _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")] + fn _vld4q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x4_t; + } + _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")] + fn _vld4_lane_s8( + ptr: *const i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i32, + size: i32, + ) -> int8x8x4_t; + } + _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")] + fn _vld4_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x4_t; + } + _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")] + fn _vld4q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x4_t; + } + _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")] + fn _vld4_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x4_t; + } + _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")] + fn _vld4q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x4_t; + } + _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26955,19 +26845,23 @@ pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26976,19 +26870,23 @@ pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26997,19 +26895,23 @@ pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27018,19 +26920,23 @@ pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld4_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27039,19 +26945,23 @@ pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4q_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27060,20 +26970,23 @@ pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(ld4, LANE = 0) )] -#[rustc_legacy_const_generics(3)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27082,21 +26995,23 @@ pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { +pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t { static_assert_uimm_bits!(LANE, 2); - vmlal_s16(a, b, vdup_lane_s16::(c)) + transmute(vld4_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(ld4, LANE = 0) )] -#[rustc_legacy_const_generics(3)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27105,21 +27020,22 @@ pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { +pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t { static_assert_uimm_bits!(LANE, 3); - vmlal_s16(a, b, vdup_laneq_s16::(c)) + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(nop) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27128,21 +27044,49 @@ pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - vmlal_s32(a, b, vdup_lane_s32::(c)) +pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_s64(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")] + fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t; + } + _vld4_s64(a as *const i8, 8) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(nop) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27151,21 +27095,21 @@ pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 2); - vmlal_s32(a, b, vdup_laneq_s32::(c)) +pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_s64(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27174,21 +27118,21 @@ pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlal_u16(a, b, vdup_lane_u16::(c)) +pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27197,21 +27141,21 @@ pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 3); - vmlal_u16(a, b, vdup_laneq_u16::(c)) +pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27220,21 +27164,21 @@ pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - vmlal_u32(a, b, vdup_lane_u32::(c)) +pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_s16(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27243,19 +27187,20 @@ pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 2); - vmlal_u32(a, b, vdup_laneq_u32::(c)) +pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_s16(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27265,18 +27210,20 @@ pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vmlal_s16(a, b, vdup_n_s16(c)) +pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_s32(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27286,18 +27233,20 @@ pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vmlal_s32(a, b, vdup_n_s32(c)) +pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_s32(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27307,18 +27256,20 @@ pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { - vmlal_u16(a, b, vdup_n_u16(c)) +pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27328,18 +27279,20 @@ pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { - vmlal_u32(a, b, vdup_n_u32(c)) +pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_s8(transmute(a))) } -#[doc = "Signed multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27349,18 +27302,20 @@ pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - unsafe { simd_add(a, vmull_s8(b, c)) } +pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_s16(transmute(a))) } -#[doc = "Signed multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27370,18 +27325,20 @@ pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - unsafe { simd_add(a, vmull_s16(b, c)) } +pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_s16(transmute(a))) } -#[doc = "Signed multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"] +#[doc = "Store SIMD&FP register (immediate offset)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -27391,60 +27348,78 @@ pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - unsafe { simd_add(a, vmull_s32(b, c)) } +pub unsafe fn vldrq_p128(a: *const p128) -> p128 { + *a } -#[doc = "Unsigned multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(fmax) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - unsafe { simd_add(a, vmull_u8(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f16" + )] + fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmax_f16(a, b) } } -#[doc = "Unsigned multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(fmax) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - unsafe { simd_add(a, vmull_u16(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v8f16" + )] + fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vmaxq_f16(a, b) } } -#[doc = "Unsigned multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(fmax) )] #[cfg_attr( not(target_arch = "arm"), @@ -27454,18 +27429,26 @@ pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - unsafe { simd_add(a, vmull_u32(b, c)) } +pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v2f32" + )] + fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmax_f32(a, b) } } -#[doc = "Floating-point multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(fmax) )] #[cfg_attr( not(target_arch = "arm"), @@ -27475,18 +27458,26 @@ pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f32" + )] + fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vmaxq_f32(a, b) } } -#[doc = "Floating-point multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smax) )] #[cfg_attr( not(target_arch = "arm"), @@ -27496,20 +27487,22 @@ pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let mask: int8x8_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27518,25 +27511,22 @@ pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - vmls_f32(a, b, vdup_lane_f32::(c)) +pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let mask: int8x16_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27545,25 +27535,22 @@ pub fn vmls_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 2); - vmls_f32(a, b, vdup_laneq_f32::(c)) +pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let mask: int16x4_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27572,25 +27559,22 @@ pub fn vmls_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); - vmlsq_f32(a, b, vdupq_lane_f32::(c)) +pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let mask: int16x8_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27599,25 +27583,22 @@ pub fn vmlsq_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlsq_f32(a, b, vdupq_laneq_f32::(c)) +pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let mask: int32x2_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27626,21 +27607,22 @@ pub fn vmlsq_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - vmls_s16(a, b, vdup_lane_s16::(c)) +pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let mask: int32x4_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27649,21 +27631,22 @@ pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - vmls_u16(a, b, vdup_lane_u16::(c)) +pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let mask: uint8x8_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27672,21 +27655,22 @@ pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - vmls_s16(a, b, vdup_laneq_s16::(c)) +pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let mask: uint8x16_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27695,21 +27679,22 @@ pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 3); - vmls_u16(a, b, vdup_laneq_u16::(c)) +pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let mask: uint16x4_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27718,21 +27703,22 @@ pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); - vmlsq_s16(a, b, vdupq_lane_s16::(c)) +pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let mask: uint16x8_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27741,21 +27727,22 @@ pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 2); - vmlsq_u16(a, b, vdupq_lane_u16::(c)) +pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let mask: uint32x2_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27764,67 +27751,82 @@ pub fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - vmlsq_s16(a, b, vdupq_laneq_s16::(c)) +pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let mask: uint32x4_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - vmlsq_u16(a, b, vdupq_laneq_u16::(c)) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f16" + )] + fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmaxnm_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - vmls_s32(a, b, vdup_lane_s32::(c)) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v8f16" + )] + fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vmaxnmq_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27833,21 +27835,27 @@ pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - vmls_u32(a, b, vdup_lane_u32::(c)) +pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v2f32" + )] + fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmaxnm_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27856,67 +27864,87 @@ pub fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - vmls_s32(a, b, vdup_laneq_s32::(c)) +pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f32" + )] + fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vmaxnmq_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - vmls_u32(a, b, vdup_laneq_u32::(c)) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f16" + )] + fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmin_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - vmlsq_s32(a, b, vdupq_lane_s32::(c)) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v8f16" + )] + fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vminq_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27925,21 +27953,27 @@ pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 1); - vmlsq_u32(a, b, vdupq_lane_u32::(c)) +pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v2f32" + )] + fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmin_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27948,21 +27982,27 @@ pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlsq_s32(a, b, vdupq_laneq_s32::(c)) +pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f32" + )] + fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vminq_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(smin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27971,19 +28011,21 @@ pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmlsq_u32(a, b, vdupq_laneq_u32::(c)) +pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let mask: int8x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -27993,18 +28035,21 @@ pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vmls_f32(a, b, vdup_n_f32(c)) +pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let mask: int8x16_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28014,18 +28059,21 @@ pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vmlsq_f32(a, b, vdupq_n_f32(c)) +pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let mask: int16x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28035,18 +28083,21 @@ pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { - vmls_s16(a, b, vdup_n_s16(c)) +pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let mask: int16x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28056,18 +28107,21 @@ pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { - vmlsq_s16(a, b, vdupq_n_s16(c)) +pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let mask: int32x2_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28077,18 +28131,21 @@ pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { - vmls_u16(a, b, vdup_n_u16(c)) +pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let mask: int32x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28098,18 +28155,21 @@ pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { - vmlsq_u16(a, b, vdupq_n_u16(c)) +pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let mask: uint8x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28119,18 +28179,21 @@ pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { - vmls_s32(a, b, vdup_n_s32(c)) +pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let mask: uint8x16_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28140,18 +28203,21 @@ pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { - vmlsq_s32(a, b, vdupq_n_s32(c)) +pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let mask: uint16x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28161,18 +28227,21 @@ pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { - vmls_u32(a, b, vdup_n_u32(c)) +pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let mask: uint16x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28182,18 +28251,21 @@ pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { - vmlsq_u32(a, b, vdupq_n_u32(c)) +pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let mask: uint32x2_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28203,60 +28275,81 @@ pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let mask: uint32x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f16" + )] + fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vminnm_f16(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v8f16" + )] + fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vminnmq_f16(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] #[cfg_attr( not(target_arch = "arm"), @@ -28266,18 +28359,26 @@ pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v2f32" + )] + fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vminnm_f32(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] #[cfg_attr( not(target_arch = "arm"), @@ -28287,18 +28388,26 @@ pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f32" + )] + fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vminnmq_f32(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"] +#[doc = "Floating-point multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -28308,18 +28417,18 @@ pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"] +#[doc = "Floating-point multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -28329,19 +28438,20 @@ pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28350,19 +28460,25 @@ pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmla_f32(a, b, vdup_lane_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28371,19 +28487,25 @@ pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmla_f32(a, b, vdup_laneq_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28392,19 +28514,25 @@ pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmlaq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlaq_f32(a, b, vdupq_lane_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28413,19 +28541,25 @@ pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmlaq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_f32(a, b, vdupq_laneq_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28434,18 +28568,19 @@ pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmla_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28456,19 +28591,19 @@ pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { +pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(LANE, 2); - vmlsl_s16(a, b, vdup_lane_s16::(c)) + vmla_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28479,19 +28614,19 @@ pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { +pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 3); - vmlsl_s16(a, b, vdup_laneq_s16::(c)) + vmla_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28502,19 +28637,19 @@ pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - vmlsl_s32(a, b, vdup_lane_s32::(c)) +pub fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmla_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28525,19 +28660,19 @@ pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { +pub fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - vmlsl_s32(a, b, vdup_laneq_s32::(c)) + vmlaq_s16(a, b, vdupq_lane_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28548,19 +28683,19 @@ pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { +pub fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { static_assert_uimm_bits!(LANE, 2); - vmlsl_u16(a, b, vdup_lane_u16::(c)) + vmlaq_u16(a, b, vdupq_lane_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28571,19 +28706,19 @@ pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { +pub fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); - vmlsl_u16(a, b, vdup_laneq_u16::(c)) + vmlaq_s16(a, b, vdupq_laneq_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28594,19 +28729,19 @@ pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - vmlsl_u32(a, b, vdup_lane_u32::(c)) +pub fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmlaq_u16(a, b, vdupq_laneq_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -28617,20 +28752,21 @@ pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 2); - vmlsl_u32(a, b, vdup_laneq_u32::(c)) +pub fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmla_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28639,19 +28775,21 @@ pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vmlsl_s16(a, b, vdup_n_s16(c)) +pub fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmla_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28660,19 +28798,21 @@ pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vmlsl_s32(a, b, vdup_n_s32(c)) +pub fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmla_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28681,19 +28821,21 @@ pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { - vmlsl_u16(a, b, vdup_n_u16(c)) +pub fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmla_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28702,19 +28844,21 @@ pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { - vmlsl_u32(a, b, vdup_n_u32(c)) +pub fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlaq_s32(a, b, vdupq_lane_s32::(c)) } -#[doc = "Signed multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28723,19 +28867,21 @@ pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - unsafe { simd_sub(a, vmull_s8(b, c)) } +pub fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlaq_u32(a, b, vdupq_lane_u32::(c)) } -#[doc = "Signed multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28744,19 +28890,21 @@ pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - unsafe { simd_sub(a, vmull_s16(b, c)) } +pub fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_s32(a, b, vdupq_laneq_s32::(c)) } -#[doc = "Signed multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28765,18 +28913,19 @@ pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - unsafe { simd_sub(a, vmull_s32(b, c)) } +pub fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_u32(a, b, vdupq_laneq_u32::(c)) } -#[doc = "Unsigned multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -28786,18 +28935,18 @@ pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - unsafe { simd_sub(a, vmull_u8(b, c)) } +pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmla_f32(a, b, vdup_n_f32(c)) } -#[doc = "Unsigned multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -28807,18 +28956,18 @@ pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - unsafe { simd_sub(a, vmull_u16(b, c)) } +pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlaq_f32(a, b, vdupq_n_f32(c)) } -#[doc = "Unsigned multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -28828,108 +28977,60 @@ pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - unsafe { simd_sub(a, vmull_u32(b, c)) } +pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmla_s16(a, b, vdup_n_s16(c)) } -#[doc = "8-bit integer matrix multiply-accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smmla) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")] - fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; - } - unsafe { _vmmlaq_s32(a, b, c) } +pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlaq_s16(a, b, vdupq_n_s16(c)) } -#[doc = "8-bit integer matrix multiply-accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ummla) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")] - fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; - } - unsafe { _vmmlaq_u32(a, b, c) } -} -#[doc = "Duplicate element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) -)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmov_n_f16(a: f16) -> float16x4_t { - vdup_n_f16(a) -} -#[doc = "Duplicate element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) -)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmovq_n_f16(a: f16) -> float16x8_t { - vdupq_n_f16(a) +pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmla_u16(a, b, vdup_n_u16(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -28939,18 +29040,18 @@ pub fn vmovq_n_f16(a: f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_f32(value: f32) -> float32x2_t { - vdup_n_f32(value) +pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlaq_u16(a, b, vdupq_n_u16(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -28960,18 +29061,18 @@ pub fn vmov_n_f32(value: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_p16(value: p16) -> poly16x4_t { - vdup_n_p16(value) +pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmla_s32(a, b, vdup_n_s32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -28981,18 +29082,18 @@ pub fn vmov_n_p16(value: p16) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_p8(value: p8) -> poly8x8_t { - vdup_n_p8(value) +pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlaq_s32(a, b, vdupq_n_s32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29002,18 +29103,18 @@ pub fn vmov_n_p8(value: p8) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s16(value: i16) -> int16x4_t { - vdup_n_s16(value) +pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmla_u32(a, b, vdup_n_u32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29023,18 +29124,18 @@ pub fn vmov_n_s16(value: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s32(value: i32) -> int32x2_t { - vdup_n_s32(value) +pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlaq_u32(a, b, vdupq_n_u32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29044,18 +29145,18 @@ pub fn vmov_n_s32(value: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s64(value: i64) -> int64x1_t { - vdup_n_s64(value) +pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29065,18 +29166,18 @@ pub fn vmov_n_s64(value: i64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s8(value: i8) -> int8x8_t { - vdup_n_s8(value) +pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29086,18 +29187,18 @@ pub fn vmov_n_s8(value: i8) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u16(value: u16) -> uint16x4_t { - vdup_n_u16(value) +pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29107,18 +29208,18 @@ pub fn vmov_n_u16(value: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u32(value: u32) -> uint32x2_t { - vdup_n_u32(value) +pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29128,18 +29229,18 @@ pub fn vmov_n_u32(value: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u64(value: u64) -> uint64x1_t { - vdup_n_u64(value) +pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29149,18 +29250,18 @@ pub fn vmov_n_u64(value: u64) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u8(value: u8) -> uint8x8_t { - vdup_n_u8(value) +pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29170,18 +29271,18 @@ pub fn vmov_n_u8(value: u8) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_f32(value: f32) -> float32x4_t { - vdupq_n_f32(value) +pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29191,18 +29292,18 @@ pub fn vmovq_n_f32(value: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_p16(value: p16) -> poly16x8_t { - vdupq_n_p16(value) +pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29212,18 +29313,18 @@ pub fn vmovq_n_p16(value: p16) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_p8(value: p8) -> poly8x16_t { - vdupq_n_p8(value) +pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29233,18 +29334,18 @@ pub fn vmovq_n_p8(value: p8) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s16(value: i16) -> int16x8_t { - vdupq_n_s16(value) +pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29254,18 +29355,18 @@ pub fn vmovq_n_s16(value: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s32(value: i32) -> int32x4_t { - vdupq_n_s32(value) +pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29275,19 +29376,20 @@ pub fn vmovq_n_s32(value: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s64(value: i64) -> int64x2_t { - vdupq_n_s64(value) +pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29296,19 +29398,21 @@ pub fn vmovq_n_s64(value: i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s8(value: i8) -> int8x16_t { - vdupq_n_s8(value) +pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29317,19 +29421,21 @@ pub fn vmovq_n_s8(value: i8) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u16(value: u16) -> uint16x8_t { - vdupq_n_u16(value) +pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlal_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29338,19 +29444,21 @@ pub fn vmovq_n_u16(value: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u32(value: u32) -> uint32x4_t { - vdupq_n_u32(value) +pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlal_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29359,19 +29467,21 @@ pub fn vmovq_n_u32(value: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u64(value: u64) -> uint64x2_t { - vdupq_n_u64(value) +pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29380,19 +29490,21 @@ pub fn vmovq_n_u64(value: u64) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u8(value: u8) -> uint8x16_t { - vdupq_n_u8(value) +pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sxtl) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29401,19 +29513,21 @@ pub fn vmovq_n_u8(value: u8) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlal_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sxtl) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29422,19 +29536,21 @@ pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlal_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sxtl) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29443,18 +29559,19 @@ pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uxtl) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29464,18 +29581,18 @@ pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlal_s16(a, b, vdup_n_s16(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uxtl) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29485,18 +29602,18 @@ pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlal_s32(a, b, vdup_n_s32(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uxtl) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29506,18 +29623,18 @@ pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlal_u16(a, b, vdup_n_u16(c)) } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29527,18 +29644,18 @@ pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlal_u32(a, b, vdup_n_u32(c)) } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"] +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29548,18 +29665,18 @@ pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + unsafe { simd_add(a, vmull_s8(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"] +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29569,18 +29686,18 @@ pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + unsafe { simd_add(a, vmull_s16(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"] +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29590,18 +29707,18 @@ pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + unsafe { simd_add(a, vmull_s32(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"] +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29611,18 +29728,18 @@ pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + unsafe { simd_add(a, vmull_u8(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"] +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -29632,59 +29749,57 @@ pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + unsafe { simd_add(a, vmull_u16(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(umlal) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + unsafe { simd_add(a, vmull_u32(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] +#[doc = "Floating-point multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_mul(a, b) } +pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] +#[doc = "Floating-point multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul) @@ -29697,19 +29812,20 @@ pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29718,68 +29834,79 @@ pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_mul(a, b) } +pub fn vmls_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmls_f32(a, b, vdup_lane_f32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { +pub fn vmls_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdup_lane_f16::(v)) } + vmls_f32(a, b, vdup_laneq_f32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdupq_lane_f16::(v)) } +pub fn vmlsq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlsq_f32(a, b, vdupq_lane_f32::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(fmul, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29788,21 +29915,25 @@ pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float1 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, vdup_lane_f32::(b)) } +pub fn vmlsq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsq_f32(a, b, vdupq_laneq_f32::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29811,21 +29942,21 @@ pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { +pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdup_laneq_f32::(b)) } + vmls_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29834,21 +29965,21 @@ pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float3 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, vdupq_lane_f32::(b)) } +pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmls_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29857,21 +29988,21 @@ pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float3 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdupq_laneq_f32::(b)) } +pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmls_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29880,21 +30011,21 @@ pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdup_lane_s16::(b)) } +pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmls_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29903,21 +30034,21 @@ pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { +pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdupq_lane_s16::(b)) } + vmlsq_s16(a, b, vdupq_lane_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29926,21 +30057,21 @@ pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, vdup_lane_s32::(b)) } +pub fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 2); + vmlsq_u16(a, b, vdupq_lane_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29949,21 +30080,21 @@ pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, vdupq_lane_s32::(b)) } +pub fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmlsq_s16(a, b, vdupq_laneq_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29972,21 +30103,21 @@ pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdup_lane_u16::(b)) } +pub fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmlsq_u16(a, b, vdupq_laneq_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29995,21 +30126,21 @@ pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdupq_lane_u16::(b)) } +pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmls_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30018,21 +30149,21 @@ pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +pub fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, vdup_lane_u32::(b)) } + vmls_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30041,21 +30172,21 @@ pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, vdupq_lane_u32::(b)) } +pub fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmls_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30064,21 +30195,21 @@ pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_mul(a, vdup_laneq_s16::(b)) } +pub fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmls_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30087,21 +30218,21 @@ pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_mul(a, vdupq_laneq_s16::(b)) } +pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlsq_s32(a, b, vdupq_lane_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30110,21 +30241,21 @@ pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdup_laneq_s32::(b)) } +pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlsq_u32(a, b, vdupq_lane_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30133,21 +30264,21 @@ pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdupq_laneq_s32::(b)) } + vmlsq_s32(a, b, vdupq_laneq_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30156,21 +30287,20 @@ pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_mul(a, vdup_laneq_u16::(b)) } +pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsq_u32(a, b, vdupq_laneq_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(fmul) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30179,21 +30309,19 @@ pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_mul(a, vdupq_laneq_u16::(b)) } +pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmls_f32(a, b, vdup_n_f32(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(fmul) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30202,21 +30330,19 @@ pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdup_laneq_u32::(b)) } +pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlsq_f32(a, b, vdupq_n_f32(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30225,49 +30351,18 @@ pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, vdupq_laneq_u32::(b)) } -} -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { - unsafe { simd_mul(a, vdup_n_f16(b)) } -} -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { - unsafe { simd_mul(a, vdupq_n_f16(b)) } +pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmls_s16(a, b, vdup_n_s16(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30277,18 +30372,18 @@ pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { - unsafe { simd_mul(a, vdup_n_f32(b)) } +pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlsq_s16(a, b, vdupq_n_s16(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30298,18 +30393,18 @@ pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { - unsafe { simd_mul(a, vdupq_n_f32(b)) } +pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmls_u16(a, b, vdup_n_u16(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30319,18 +30414,18 @@ pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - unsafe { simd_mul(a, vdup_n_s16(b)) } +pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlsq_u16(a, b, vdupq_n_u16(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30340,18 +30435,18 @@ pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { - unsafe { simd_mul(a, vdupq_n_s16(b)) } +pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmls_s32(a, b, vdup_n_s32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30361,18 +30456,18 @@ pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - unsafe { simd_mul(a, vdup_n_s32(b)) } +pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlsq_s32(a, b, vdupq_n_s32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30382,18 +30477,18 @@ pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { - unsafe { simd_mul(a, vdupq_n_s32(b)) } +pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmls_u32(a, b, vdup_n_u32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30403,18 +30498,18 @@ pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { - unsafe { simd_mul(a, vdup_n_u16(b)) } +pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlsq_u32(a, b, vdupq_n_u32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30424,18 +30519,18 @@ pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { - unsafe { simd_mul(a, vdupq_n_u16(b)) } +pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30445,18 +30540,18 @@ pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { - unsafe { simd_mul(a, vdup_n_u32(b)) } +pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30466,18 +30561,18 @@ pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { - unsafe { simd_mul(a, vdupq_n_u32(b)) } +pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Polynomial multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(pmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30487,26 +30582,18 @@ pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.pmul.v8i8" - )] - fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - } - unsafe { _vmul_p8(a, b) } +pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Polynomial multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(pmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30516,26 +30603,18 @@ pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.pmul.v16i8" - )] - fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - } - unsafe { _vmulq_p8(a, b) } +pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30545,18 +30624,18 @@ pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30566,18 +30645,18 @@ pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_mul(a, b) } +pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30587,18 +30666,18 @@ pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30608,18 +30687,18 @@ pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_mul(a, b) } +pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30629,18 +30708,18 @@ pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30650,18 +30729,18 @@ pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_mul(a, b) } +pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -30671,19 +30750,20 @@ pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30692,19 +30772,21 @@ pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30713,19 +30795,21 @@ pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlsl_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30734,19 +30818,21 @@ pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlsl_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30755,19 +30841,21 @@ pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(umlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30776,20 +30864,21 @@ pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(umlsl, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30798,21 +30887,21 @@ pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmull_s16(a, vdup_lane_s16::(b)) +pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlsl_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(umlsl, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30821,21 +30910,21 @@ pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 3); - vmull_s16(a, vdup_laneq_s16::(b)) +pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlsl_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(umlsl, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30844,21 +30933,20 @@ pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - vmull_s32(a, vdup_lane_s32::(b)) +pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30867,21 +30955,19 @@ pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 2); - vmull_s32(a, vdup_laneq_s32::(b)) +pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlsl_s16(a, b, vdup_n_s16(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30890,21 +30976,19 @@ pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - vmull_u16(a, vdup_lane_u16::(b)) +pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlsl_s32(a, b, vdup_n_s32(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(umlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30913,21 +30997,19 @@ pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 3); - vmull_u16(a, vdup_laneq_u16::(b)) +pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlsl_u16(a, b, vdup_n_u16(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(umlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30936,21 +31018,19 @@ pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - vmull_u32(a, vdup_lane_u32::(b)) +pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlsl_u32(a, b, vdup_n_u32(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"] +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30959,19 +31039,18 @@ pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 2); - vmull_u32(a, vdup_laneq_u32::(b)) +pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + unsafe { simd_sub(a, vmull_s8(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"] +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(smlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -30981,18 +31060,18 @@ pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { - vmull_s16(a, vdup_n_s16(b)) +pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + unsafe { simd_sub(a, vmull_s16(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"] +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(smlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31002,18 +31081,18 @@ pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { - vmull_s32(a, vdup_n_s32(b)) +pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + unsafe { simd_sub(a, vmull_s32(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"] +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(umlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31023,18 +31102,18 @@ pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { - vmull_u16(a, vdup_n_u16(b)) +pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + unsafe { simd_sub(a, vmull_u8(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"] +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(umlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31044,18 +31123,18 @@ pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { - vmull_u32(a, vdup_n_u32(b)) +pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + unsafe { simd_sub(a, vmull_u16(b, c)) } } -#[doc = "Polynomial multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"] +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(pmull) + assert_instr(umlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31065,47 +31144,108 @@ pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { +pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + unsafe { simd_sub(a, vmull_u32(b, c)) } +} +#[doc = "8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.pmull.v8i16" + link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")] - fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")] + fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; } - unsafe { _vmull_p8(a, b) } + unsafe { _vmmlaq_s32(a, b, c) } } -#[doc = "Signed multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"] +#[doc = "8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(ummla) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")] + fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; + } + unsafe { _vmmlaq_u32(a, b, c) } } -#[doc = "Signed multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"] +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(dup) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmov_n_f16(a: f16) -> float16x4_t { + vdup_n_f16(a) +} +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmovq_n_f16(a: f16) -> float16x8_t { + vdupq_n_f16(a) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31115,18 +31255,18 @@ pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_f32(value: f32) -> float32x2_t { + vdup_n_f32(value) } -#[doc = "Signed multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31136,18 +31276,18 @@ pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_p16(value: p16) -> poly16x4_t { + vdup_n_p16(value) } -#[doc = "Unsigned multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31157,18 +31297,18 @@ pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_p8(value: p8) -> poly8x8_t { + vdup_n_p8(value) } -#[doc = "Unsigned multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31178,18 +31318,18 @@ pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_s16(value: i16) -> int16x4_t { + vdup_n_s16(value) } -#[doc = "Unsigned multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31199,18 +31339,18 @@ pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_s32(value: i32) -> int32x2_t { + vdup_n_s32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(fmov) )] #[cfg_attr( not(target_arch = "arm"), @@ -31220,19 +31360,18 @@ pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { - let b = poly8x8_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmov_n_s64(value: i64) -> int64x1_t { + vdup_n_s64(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31242,19 +31381,18 @@ pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { - let b = int16x4_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmov_n_s8(value: i8) -> int8x8_t { + vdup_n_s8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31264,19 +31402,18 @@ pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { - let b = int32x2_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u16(value: u16) -> uint16x4_t { + vdup_n_u16(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31286,19 +31423,18 @@ pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { - let b = int8x8_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u32(value: u32) -> uint32x2_t { + vdup_n_u32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(fmov) )] #[cfg_attr( not(target_arch = "arm"), @@ -31308,19 +31444,18 @@ pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { - let b = uint16x4_t::splat(65_535); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u64(value: u64) -> uint64x1_t { + vdup_n_u64(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31330,19 +31465,18 @@ pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { - let b = uint32x2_t::splat(4_294_967_295); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u8(value: u8) -> uint8x8_t { + vdup_n_u8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31352,19 +31486,18 @@ pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { - let b = uint8x8_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_f32(value: f32) -> float32x4_t { + vdupq_n_f32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31374,19 +31507,18 @@ pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { - let b = poly8x16_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_p16(value: p16) -> poly16x8_t { + vdupq_n_p16(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31396,19 +31528,18 @@ pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { - let b = int16x8_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_p8(value: p8) -> poly8x16_t { + vdupq_n_p8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31418,19 +31549,18 @@ pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { - let b = int32x4_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s16(value: i16) -> int16x8_t { + vdupq_n_s16(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31440,19 +31570,18 @@ pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { - let b = int8x16_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s32(value: i32) -> int32x4_t { + vdupq_n_s32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31462,19 +31591,18 @@ pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { - let b = uint16x8_t::splat(65_535); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s64(value: i64) -> int64x2_t { + vdupq_n_s64(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31484,19 +31612,18 @@ pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { - let b = uint32x4_t::splat(4_294_967_295); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s8(value: i8) -> int8x16_t { + vdupq_n_s8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31506,63 +31633,60 @@ pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { - let b = uint8x16_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_u16(value: u16) -> uint16x8_t { + vdupq_n_u16(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(dup) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vneg_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_neg(a) } +pub fn vmovq_n_u32(value: u32) -> uint32x4_t { + vdupq_n_u32(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(dup) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_neg(a) } +pub fn vmovq_n_u64(value: u64) -> uint64x2_t { + vdupq_n_u64(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31572,18 +31696,18 @@ pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_neg(a) } +pub fn vmovq_n_u8(value: u8) -> uint8x16_t { + vdupq_n_u8(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(sxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31593,18 +31717,18 @@ pub fn vneg_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_neg(a) } +pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(sxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31614,18 +31738,18 @@ pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_neg(a) } +pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(sxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31635,18 +31759,18 @@ pub fn vneg_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_neg(a) } +pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(uxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31656,18 +31780,18 @@ pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_s16(a: int16x4_t) -> int16x4_t { - unsafe { simd_neg(a) } +pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(uxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31677,18 +31801,18 @@ pub fn vneg_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { - unsafe { simd_neg(a) } +pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(uxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31698,18 +31822,18 @@ pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_s32(a: int32x2_t) -> int32x2_t { - unsafe { simd_neg(a) } +pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -31719,18 +31843,18 @@ pub fn vneg_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { - unsafe { simd_neg(a) } +pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -31740,19 +31864,18 @@ pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - let c = int16x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -31762,19 +31885,18 @@ pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - let c = int32x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -31784,19 +31906,18 @@ pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - let c = int64x1_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -31806,19 +31927,18 @@ pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - let c = int8x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -31828,63 +31948,62 @@ pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - let c = int16x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - let c = int32x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - let c = int64x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -31894,19 +32013,18 @@ pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - let c = int8x16_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -31916,64 +32034,68 @@ pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - let c = int16x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - let c = int32x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_lane_f16::(v)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - let c = int64x1_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_lane_f16::(v)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31982,20 +32104,21 @@ pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - let c = int8x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdup_lane_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32004,20 +32127,21 @@ pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - let c = int16x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_laneq_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32026,20 +32150,21 @@ pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - let c = int32x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdupq_lane_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32048,20 +32173,21 @@ pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - let c = int64x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_laneq_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32070,20 +32196,21 @@ pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - let c = int8x16_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_lane_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32092,19 +32219,21 @@ pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_lane_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32113,19 +32242,21 @@ pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_or(a, b) } +pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdup_lane_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32134,19 +32265,21 @@ pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdupq_lane_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32155,19 +32288,21 @@ pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_or(a, b) } +pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_lane_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32176,19 +32311,21 @@ pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_lane_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32197,19 +32334,21 @@ pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_or(a, b) } +pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdup_lane_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32218,19 +32357,21 @@ pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdupq_lane_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32239,19 +32380,21 @@ pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdup_laneq_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32260,19 +32403,21 @@ pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdupq_laneq_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32281,19 +32426,21 @@ pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_laneq_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32302,19 +32449,21 @@ pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_laneq_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32323,19 +32472,21 @@ pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdup_laneq_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32344,19 +32495,21 @@ pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdupq_laneq_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32365,19 +32518,21 @@ pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_laneq_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32386,18 +32541,49 @@ pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_laneq_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + unsafe { simd_mul(a, vdup_n_f16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + unsafe { simd_mul(a, vdupq_n_f16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32407,18 +32593,18 @@ pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_or(a, b) } +pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { + unsafe { simd_mul(a, vdup_n_f32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32428,27 +32614,18 @@ pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { - let x: int16x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_s8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_s8(b), a); - }; - x +pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { + unsafe { simd_mul(a, vdupq_n_f32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32458,27 +32635,18 @@ pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - let x: int16x8_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_s8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_s8(b), a); - }; - x +pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + unsafe { simd_mul(a, vdup_n_s16(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32488,27 +32656,18 @@ pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { - let x: int32x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_s16(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_s16(b), a); - }; - x +pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + unsafe { simd_mul(a, vdupq_n_s16(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32518,27 +32677,18 @@ pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - let x: int32x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_s16(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_s16(b), a); - }; - x +pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + unsafe { simd_mul(a, vdup_n_s32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32548,27 +32698,18 @@ pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { - let x: int64x1_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_s32(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_s32(b), a); - }; - x +pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + unsafe { simd_mul(a, vdupq_n_s32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32578,27 +32719,18 @@ pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - let x: int64x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_s32(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_s32(b), a); - }; - x +pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { + unsafe { simd_mul(a, vdup_n_u16(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32608,27 +32740,18 @@ pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { - let x: uint16x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_u8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_u8(b), a); - }; - x +pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { + unsafe { simd_mul(a, vdupq_n_u16(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32638,27 +32761,18 @@ pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - let x: uint16x8_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_u8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_u8(b), a); - }; - x +pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { + unsafe { simd_mul(a, vdup_n_u32(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32668,27 +32782,18 @@ pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { - let x: uint32x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_u16(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_u16(b), a); - }; - x +pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { + unsafe { simd_mul(a, vdupq_n_u32(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"] +#[doc = "Polynomial multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(pmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32698,27 +32803,26 @@ pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - let x: uint32x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_u16(a, b); +pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmul.v8i8" + )] + fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_u16(b), a); - }; - x + unsafe { _vmul_p8(a, b) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"] +#[doc = "Polynomial multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(pmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32728,27 +32832,26 @@ pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { - let x: uint64x1_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_u32(a, b); +pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmul.v16i8" + )] + fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_u32(b), a); - }; - x + unsafe { _vmulq_p8(a, b) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32758,57 +32861,39 @@ pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - let x: uint64x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_u32(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_u32(b), a); - }; - x +pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Floating-point add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(faddp) + assert_instr(mul) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v4f16" - )] - fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vpadd_f16(a, b) } +pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Floating-point add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(faddp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32818,26 +32903,18 @@ pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v2f32" - )] - fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vpadd_f32(a, b) } +pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32847,26 +32924,18 @@ pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] - fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vpadd_s8(a, b) } +pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32876,26 +32945,18 @@ pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] - fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vpadd_s16(a, b) } +pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32905,26 +32966,18 @@ pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] - fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vpadd_s32(a, b) } +pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32934,18 +32987,18 @@ pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) } +pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32955,18 +33008,18 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) } +pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32976,18 +33029,18 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) } +pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32997,26 +33050,18 @@ pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")] - fn _vpaddl_s8(a: int8x8_t) -> int16x4_t; - } - unsafe { _vpaddl_s8(a) } +pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33026,26 +33071,18 @@ pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] - fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t; - } - unsafe { _vpaddlq_s8(a) } +pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33055,27 +33092,20 @@ pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] - fn _vpaddl_s16(a: int16x4_t) -> int32x2_t; - } - unsafe { _vpaddl_s16(a) } +pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33084,27 +33114,21 @@ pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] - fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t; - } - unsafe { _vpaddlq_s16(a) } +pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmull_s16(a, vdup_lane_s16::(b)) } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33113,27 +33137,21 @@ pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] - fn _vpaddl_s32(a: int32x2_t) -> int64x1_t; - } - unsafe { _vpaddl_s32(a) } +pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmull_s16(a, vdup_laneq_s16::(b)) } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33142,27 +33160,21 @@ pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] - fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t; - } - unsafe { _vpaddlq_s32(a) } +pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmull_s32(a, vdup_lane_s32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33171,27 +33183,21 @@ pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] - fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t; - } - unsafe { _vpaddl_u8(a) } +pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmull_s32(a, vdup_laneq_s32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33200,27 +33206,21 @@ pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] - fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t; - } - unsafe { _vpaddlq_u8(a) } +pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmull_u16(a, vdup_lane_u16::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33229,27 +33229,21 @@ pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] - fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t; - } - unsafe { _vpaddl_u16(a) } +pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmull_u16(a, vdup_laneq_u16::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33258,27 +33252,21 @@ pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] - fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t; - } - unsafe { _vpaddlq_u16(a) } +pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmull_u32(a, vdup_lane_u32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33287,26 +33275,19 @@ pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] - fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t; - } - unsafe { _vpaddl_u32(a) } +pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmull_u32(a, vdup_laneq_u32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33316,26 +33297,18 @@ pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] - fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t; - } - unsafe { _vpaddlq_u32(a) } +pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vmull_s16(a, vdup_n_s16(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33345,26 +33318,18 @@ pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxp.v2f32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] - fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vpmax_f32(a, b) } +pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vmull_s32(a, vdup_n_s32(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smaxp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33374,26 +33339,18 @@ pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] - fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vpmax_s8(a, b) } +pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { + vmull_u16(a, vdup_n_u16(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smaxp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33403,26 +33360,18 @@ pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] - fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vpmax_s16(a, b) } +pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { + vmull_u32(a, vdup_n_u32(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] +#[doc = "Polynomial multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smaxp) + assert_instr(pmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33432,26 +33381,26 @@ pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v2i32" + link_name = "llvm.aarch64.neon.pmull.v8i16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] - fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")] + fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; } - unsafe { _vpmax_s32(a, b) } + unsafe { _vmull_p8(a, b) } } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33461,26 +33410,18 @@ pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] - fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vpmax_u8(a, b) } +pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33490,26 +33431,18 @@ pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] - fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vpmax_u16(a, b) } +pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33519,26 +33452,18 @@ pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] - fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vpmax_u32(a, b) } +pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33548,26 +33473,18 @@ pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminp.v2f32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] - fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vpmin_f32(a, b) } +pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sminp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33577,26 +33494,18 @@ pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] - fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vpmin_s8(a, b) } +pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sminp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -33606,26 +33515,18 @@ pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] - fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vpmin_s16(a, b) } +pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33635,26 +33536,19 @@ pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] - fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vpmin_s32(a, b) } +pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { + let b = poly8x8_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33664,26 +33558,19 @@ pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] - fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vpmin_u8(a, b) } +pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { + let b = int16x4_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33693,26 +33580,19 @@ pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] - fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vpmin_u16(a, b) } +pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { + let b = int32x2_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33722,26 +33602,19 @@ pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] - fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vpmin_u32(a, b) } +pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { + let b = int8x8_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33751,26 +33624,19 @@ pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] - fn _vqabs_s8(a: int8x8_t) -> int8x8_t; - } - unsafe { _vqabs_s8(a) } +pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { + let b = uint16x4_t::splat(65_535); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33780,26 +33646,19 @@ pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] - fn _vqabsq_s8(a: int8x16_t) -> int8x16_t; - } - unsafe { _vqabsq_s8(a) } +pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { + let b = uint32x2_t::splat(4_294_967_295); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33809,26 +33668,19 @@ pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] - fn _vqabs_s16(a: int16x4_t) -> int16x4_t; - } - unsafe { _vqabs_s16(a) } +pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { + let b = uint8x8_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33838,26 +33690,19 @@ pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] - fn _vqabsq_s16(a: int16x8_t) -> int16x8_t; - } - unsafe { _vqabsq_s16(a) } +pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { + let b = poly8x16_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33867,26 +33712,19 @@ pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] - fn _vqabs_s32(a: int32x2_t) -> int32x2_t; - } - unsafe { _vqabs_s32(a) } +pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { + let b = int16x8_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33896,26 +33734,19 @@ pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] - fn _vqabsq_s32(a: int32x4_t) -> int32x4_t; - } - unsafe { _vqabsq_s32(a) } +pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { + let b = int32x4_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33925,18 +33756,19 @@ pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { + let b = int8x16_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33946,18 +33778,19 @@ pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { + let b = uint16x8_t::splat(65_535); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33967,18 +33800,19 @@ pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { + let b = uint32x4_t::splat(4_294_967_295); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -33988,60 +33822,63 @@ pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { + let b = uint8x16_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_saturating_add(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vneg_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_saturating_add(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34051,18 +33888,18 @@ pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34072,18 +33909,18 @@ pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34093,18 +33930,18 @@ pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34114,18 +33951,18 @@ pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34135,18 +33972,18 @@ pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34156,18 +33993,18 @@ pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34177,18 +34014,18 @@ pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34198,18 +34035,18 @@ pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34219,18 +34056,19 @@ pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_saturating_add(a, b) } +pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34240,24 +34078,20 @@ pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_saturating_add(a, b) } +pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sqdmlal, N = 2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34266,25 +34100,20 @@ pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - vqaddq_s32(a, vqdmull_lane_s16::(b, c)) +pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sqdmlal, N = 1) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34293,19 +34122,19 @@ pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - vqaddq_s64(a, vqdmull_lane_s32::(b, c)) +pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34315,18 +34144,19 @@ pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vqaddq_s32(a, vqdmull_n_s16(b, c)) +pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34336,18 +34166,19 @@ pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vqaddq_s64(a, vqdmull_n_s32(b, c)) +pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Signed saturating doubling multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34357,18 +34188,19 @@ pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - vqaddq_s32(a, vqdmull_s16(b, c)) +pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Signed saturating doubling multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34378,24 +34210,20 @@ pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - vqaddq_s64(a, vqdmull_s32(b, c)) +pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sqdmlsl, N = 2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34404,25 +34232,20 @@ pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - vqsubq_s32(a, vqdmull_lane_s16::(b, c)) +pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sqdmlsl, N = 1) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34431,19 +34254,19 @@ pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - vqsubq_s64(a, vqdmull_lane_s32::(b, c)) +pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34453,18 +34276,19 @@ pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vqsubq_s32(a, vqdmull_n_s16(b, c)) -} -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"] +pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34474,18 +34298,19 @@ pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vqsubq_s64(a, vqdmull_n_s32(b, c)) +pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Signed saturating doubling multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34495,18 +34320,19 @@ pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - vqsubq_s32(a, vqdmull_s16(b, c)) +pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Signed saturating doubling multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34516,20 +34342,20 @@ pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - vqsubq_s64(a, vqdmull_s32(b, c)) +pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orn) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34538,21 +34364,20 @@ pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::(b))) +pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orn) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34561,21 +34386,20 @@ pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::(b))) +pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34584,21 +34408,19 @@ pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::(b))) +pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34607,19 +34429,18 @@ pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::(b))) +pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34629,19 +34450,18 @@ pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - let b: int16x4_t = vdup_n_s16(b); - vqdmulh_s16(a, b) +pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34651,19 +34471,18 @@ pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { - let b: int16x8_t = vdupq_n_s16(b); - vqdmulhq_s16(a, b) +pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34673,19 +34492,18 @@ pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - let b: int32x2_t = vdup_n_s32(b); - vqdmulh_s32(a, b) +pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34695,19 +34513,18 @@ pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { - let b: int32x4_t = vdupq_n_s32(b); - vqdmulhq_s32(a, b) +pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34717,26 +34534,18 @@ pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v4i16" - )] - fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vqdmulh_s16(a, b) } +pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34746,26 +34555,18 @@ pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v8i16" - )] - fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vqdmulhq_s16(a, b) } +pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34775,26 +34576,18 @@ pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v2i32" - )] - fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vqdmulh_s32(a, b) } +pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34804,28 +34597,19 @@ pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v4i32" - )] - fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vqdmulhq_s32(a, b) } +pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull, N = 2) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34834,22 +34618,19 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - let b = vdup_lane_s16::(b); - vqdmull_s16(a, b) +pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull, N = 1) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34858,20 +34639,18 @@ pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - let b = vdup_lane_s32::(b); - vqdmull_s32(a, b) +pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34881,18 +34660,18 @@ pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { - vqdmull_s16(a, vdup_n_s16(b)) +pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34902,18 +34681,18 @@ pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { - vqdmull_s32(a, vdup_n_s32(b)) +pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34923,26 +34702,18 @@ pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmull.v4i32" - )] - fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - } - unsafe { _vqdmull_s16(a, b) } +pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -34952,26 +34723,18 @@ pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmull.v2i64" - )] - fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - } - unsafe { _vqdmull_s32(a, b) } +pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -34981,26 +34744,27 @@ pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtn.v8i8" - )] - fn _vqmovn_s16(a: int16x8_t) -> int8x8_t; +pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + let x: int16x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s8(a, b); } - unsafe { _vqmovn_s16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s8(b), a); + }; + x } -#[doc = "Signed saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35010,26 +34774,27 @@ pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtn.v4i16" - )] - fn _vqmovn_s32(a: int32x4_t) -> int16x4_t; +pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let x: int16x8_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s8(a, b); } - unsafe { _vqmovn_s32(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s8(b), a); + }; + x } -#[doc = "Signed saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35039,26 +34804,27 @@ pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtn.v2i32" - )] - fn _vqmovn_s64(a: int64x2_t) -> int32x2_t; +pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + let x: int32x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s16(a, b); } - unsafe { _vqmovn_s64(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s16(b), a); + }; + x } -#[doc = "Unsigned saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35068,26 +34834,27 @@ pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqxtn.v8i8" - )] - fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t; +pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let x: int32x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s16(a, b); } - unsafe { _vqmovn_u16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s16(b), a); + }; + x } -#[doc = "Unsigned saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35097,26 +34864,27 @@ pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqxtn.v4i16" - )] - fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t; +pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + let x: int64x1_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s32(a, b); } - unsafe { _vqmovn_u32(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s32(b), a); + }; + x } -#[doc = "Unsigned saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35126,26 +34894,27 @@ pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqxtn.v2i32" - )] - fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t; +pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let x: int64x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s32(a, b); } - unsafe { _vqmovn_u64(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s32(b), a); + }; + x } -#[doc = "Signed saturating extract unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtun) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35155,26 +34924,27 @@ pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtun.v8i8" - )] - fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t; +pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + let x: uint16x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u8(a, b); } - unsafe { _vqmovun_s16(a) } -} -#[doc = "Signed saturating extract unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"] + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u8(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtun) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35184,26 +34954,27 @@ pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtun.v4i16" - )] - fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t; +pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let x: uint16x8_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u8(a, b); } - unsafe { _vqmovun_s32(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u8(b), a); + }; + x } -#[doc = "Signed saturating extract unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtun) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35213,26 +34984,27 @@ pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtun.v2i32" - )] - fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t; +pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + let x: uint32x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u16(a, b); } - unsafe { _vqmovun_s64(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u16(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35242,26 +35014,27 @@ pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] - fn _vqneg_s8(a: int8x8_t) -> int8x8_t; +pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let x: uint32x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u16(a, b); } - unsafe { _vqneg_s8(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u16(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35271,26 +35044,27 @@ pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] - fn _vqnegq_s8(a: int8x16_t) -> int8x16_t; +pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + let x: uint64x1_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u32(a, b); } - unsafe { _vqnegq_s8(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u32(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35300,84 +35074,95 @@ pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqneg_s16(a: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] - fn _vqneg_s16(a: int16x4_t) -> int16x4_t; +pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let x: uint64x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u32(a, b); } - unsafe { _vqneg_s16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u32(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(faddp) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v8i16" + link_name = "llvm.aarch64.neon.faddp.v4f16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] - fn _vqnegq_s16(a: int16x8_t) -> int16x8_t; + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; } - unsafe { _vqnegq_s16(a) } + unsafe { _vpadd_f16(a, b) } } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(faddp) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v2i32" + link_name = "llvm.aarch64.neon.faddp.v4f16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] - fn _vqneg_s32(a: int32x2_t) -> int32x2_t; + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpadd_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vqneg_s32(a) } } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(faddp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35387,28 +35172,28 @@ pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { +pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v4i32" + link_name = "llvm.aarch64.neon.faddp.v2f32" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] - fn _vqnegq_s32(a: int32x4_t) -> int32x4_t; + fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqnegq_s32(a) } + unsafe { _vpadd_f32(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(faddp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35417,22 +35202,33 @@ pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - let b = vdup_lane_s16::(b); - vqrdmulh_s16(a, b) +pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v2f32" + )] + fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpadd_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35441,22 +35237,28 @@ pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - let b = vdup_lane_s32::(b); - vqrdmulh_s32(a, b) +pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vpadd_s8(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35465,22 +35267,33 @@ pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - let b = vdup_laneq_s16::(b); - vqrdmulh_s16(a, b) +pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = _vpadd_s8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35489,22 +35302,28 @@ pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - let b = vdup_laneq_s32::(b); - vqrdmulh_s32(a, b) +pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vpadd_s16(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35513,22 +35332,33 @@ pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); - let b = vdupq_lane_s16::(b); - vqrdmulhq_s16(a, b) +pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = _vpadd_s16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35537,22 +35367,28 @@ pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - let b = vdupq_lane_s32::(b); - vqrdmulhq_s32(a, b) +pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vpadd_s32(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35561,22 +35397,32 @@ pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - let b = vdupq_laneq_s16::(b); - vqrdmulhq_s16(a, b) +pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = _vpadd_s32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35585,20 +35431,18 @@ pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - let b = vdupq_laneq_s32::(b); - vqrdmulhq_s32(a, b) +pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(addp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35608,18 +35452,18 @@ pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - vqrdmulh_s16(a, vdup_n_s16(b)) +pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(addp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35629,18 +35473,18 @@ pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { - vqrdmulhq_s16(a, vdupq_n_s16(b)) +pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35650,18 +35494,26 @@ pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - vqrdmulh_s32(a, vdup_n_s32(b)) +pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")] + fn _vpaddl_s8(a: int8x8_t) -> int16x4_t; + } + unsafe { _vpaddl_s8(a) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35671,18 +35523,26 @@ pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { - vqrdmulhq_s32(a, vdupq_n_s32(b)) +pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] + fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t; + } + unsafe { _vpaddlq_s8(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35692,26 +35552,26 @@ pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v4i16" + link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" )] - fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] + fn _vpaddl_s16(a: int16x4_t) -> int32x2_t; } - unsafe { _vqrdmulh_s16(a, b) } + unsafe { _vpaddl_s16(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35721,26 +35581,26 @@ pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v8i16" + link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" )] - fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] + fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t; } - unsafe { _vqrdmulhq_s16(a, b) } + unsafe { _vpaddlq_s16(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"] -#[inline] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"] +#[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35750,26 +35610,26 @@ pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v2i32" + link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" )] - fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] + fn _vpaddl_s32(a: int32x2_t) -> int64x1_t; } - unsafe { _vqrdmulh_s32(a, b) } + unsafe { _vpaddl_s32(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35779,26 +35639,26 @@ pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v4i32" + link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" )] - fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] + fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t; } - unsafe { _vqrdmulhq_s32(a, b) } + unsafe { _vpaddlq_s32(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35808,26 +35668,26 @@ pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v8i8" + link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" )] - fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] + fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t; } - unsafe { _vqrshl_s8(a, b) } + unsafe { _vpaddl_u8(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35837,26 +35697,26 @@ pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v16i8" + link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" )] - fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] + fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t; } - unsafe { _vqrshlq_s8(a, b) } + unsafe { _vpaddlq_u8(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35866,26 +35726,26 @@ pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v4i16" + link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" )] - fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] + fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t; } - unsafe { _vqrshl_s16(a, b) } + unsafe { _vpaddl_u16(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35895,26 +35755,26 @@ pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v8i16" + link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" )] - fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] + fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t; } - unsafe { _vqrshlq_s16(a, b) } + unsafe { _vpaddlq_u16(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35924,26 +35784,26 @@ pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v2i32" + link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" )] - fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] + fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t; } - unsafe { _vqrshl_s32(a, b) } + unsafe { _vpaddl_u32(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35953,26 +35813,27 @@ pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v4i32" + link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" )] - fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] + fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t; } - unsafe { _vqrshlq_s32(a, b) } + unsafe { _vpaddlq_u32(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(fmaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35982,26 +35843,27 @@ pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v1i64" + link_name = "llvm.aarch64.neon.fmaxp.v2f32" )] - fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqrshl_s64(a, b) } + unsafe { _vpmax_f32(a, b) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(fmaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36011,26 +35873,32 @@ pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v2i64" + link_name = "llvm.aarch64.neon.fmaxp.v2f32" )] - fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpmax_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshlq_s64(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36040,26 +35908,27 @@ pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { +pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v8i8" + link_name = "llvm.aarch64.neon.smaxp.v8i8" )] - fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } - unsafe { _vqrshl_u8(a, b) } + unsafe { _vpmax_s8(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36069,26 +35938,32 @@ pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { +pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v16i8" + link_name = "llvm.aarch64.neon.smaxp.v8i8" )] - fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = _vpmax_s8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vqrshlq_u8(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36098,26 +35973,27 @@ pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { +pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v4i16" + link_name = "llvm.aarch64.neon.smaxp.v4i16" )] - fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } - unsafe { _vqrshl_u16(a, b) } + unsafe { _vpmax_s16(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36127,26 +36003,32 @@ pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { +pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v8i16" + link_name = "llvm.aarch64.neon.smaxp.v4i16" )] - fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = _vpmax_s16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vqrshlq_u16(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36156,26 +36038,27 @@ pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { +pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v2i32" + link_name = "llvm.aarch64.neon.smaxp.v2i32" )] - fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } - unsafe { _vqrshl_u32(a, b) } + unsafe { _vpmax_s32(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36185,26 +36068,32 @@ pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { +pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v4i32" + link_name = "llvm.aarch64.neon.smaxp.v2i32" )] - fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = _vpmax_s32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshlq_u32(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(umaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36214,26 +36103,27 @@ pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { +pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v1i64" + link_name = "llvm.aarch64.neon.umaxp.v8i8" )] - fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } - unsafe { _vqrshl_u64(a, b) } + unsafe { _vpmax_u8(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(umaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36243,343 +36133,258 @@ pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { +pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v2i64" + link_name = "llvm.aarch64.neon.umaxp.v8i8" )] - fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vqrshlq_u64(a, b) } -} -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] - fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } - unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } -} -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] - fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } - unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } -} -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] - fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = _vpmax_u8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrn.v8i8" + link_name = "llvm.aarch64.neon.umaxp.v4i16" )] - fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; } - unsafe { _vqrshrn_n_s16(a, N) } + unsafe { _vpmax_u16(a, b) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrn.v4i16" + link_name = "llvm.aarch64.neon.umaxp.v4i16" )] - fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = _vpmax_u16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vqrshrn_n_s32(a, N) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrn.v2i32" + link_name = "llvm.aarch64.neon.umaxp.v2i32" )] - fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; } - unsafe { _vqrshrn_n_s64(a, N) } + unsafe { _vpmax_u32(a, b) } } -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] - fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; - } - unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] - fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; - } - unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] - fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; - } - unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshrn.v8i8" + link_name = "llvm.aarch64.neon.umaxp.v2i32" )] - fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; } - unsafe { _vqrshrn_n_u16(a, N) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshrn.v4i16" - )] - fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = _vpmax_u32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshrn_n_u32(a, N) } } -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshrn.v2i32" + link_name = "llvm.aarch64.neon.fminp.v2f32" )] - fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; - } - unsafe { _vqrshrn_n_u64(a, N) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] - fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; - } - unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] - fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; - } - unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] - fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } + unsafe { _vpmin_f32(a, b) } } -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrun.v8i8" + link_name = "llvm.aarch64.neon.fminp.v2f32" )] - fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqrshrun_n_s16(a, N) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrun.v4i16" - )] - fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpmin_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshrun_n_s32(a, N) } } -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrun.v2i32" + link_name = "llvm.aarch64.neon.sminp.v8i8" )] - fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } - unsafe { _vqrshrun_n_s64(a, N) } + unsafe { _vpmin_s8(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36588,21 +36393,33 @@ pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - vqshl_s8(a, vdup_n_s8(N as _)) +pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = _vpmin_s8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36611,21 +36428,28 @@ pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - vqshlq_s8(a, vdupq_n_s8(N as _)) +pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vpmin_s16(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36634,21 +36458,33 @@ pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - vqshl_s16(a, vdup_n_s16(N as _)) +pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = _vpmin_s16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36657,21 +36493,28 @@ pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - vqshlq_s16(a, vdupq_n_s16(N as _)) +pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vpmin_s32(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36680,21 +36523,33 @@ pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 5); - vqshl_s32(a, vdup_n_s32(N as _)) +pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = _vpmin_s32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36703,21 +36558,28 @@ pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 5); - vqshlq_s32(a, vdupq_n_s32(N as _)) +pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vpmin_u8(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36726,21 +36588,33 @@ pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { - static_assert_uimm_bits!(N, 6); - vqshl_s64(a, vdup_n_s64(N as _)) +pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = _vpmin_u8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36749,21 +36623,28 @@ pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 6); - vqshlq_s64(a, vdupq_n_s64(N as _)) +pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vpmin_u16(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36772,21 +36653,33 @@ pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - vqshl_u8(a, vdup_n_s8(N as _)) +pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = _vpmin_u16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36795,21 +36688,28 @@ pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - vqshlq_u8(a, vdupq_n_s8(N as _)) +pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vpmin_u32(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36818,21 +36718,32 @@ pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - vqshl_u16(a, vdup_n_s16(N as _)) +pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = _vpmin_u32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36841,21 +36752,27 @@ pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - vqshlq_u16(a, vdupq_n_s16(N as _)) +pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] + fn _vqabs_s8(a: int8x8_t) -> int8x8_t; + } + unsafe { _vqabs_s8(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36864,21 +36781,27 @@ pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - vqshl_u32(a, vdup_n_s32(N as _)) +pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] + fn _vqabsq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vqabsq_s8(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36887,21 +36810,27 @@ pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - vqshlq_u32(a, vdupq_n_s32(N as _)) +pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] + fn _vqabs_s16(a: int16x4_t) -> int16x4_t; + } + unsafe { _vqabs_s16(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36910,21 +36839,27 @@ pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - vqshl_u64(a, vdup_n_s64(N as _)) +pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] + fn _vqabsq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vqabsq_s16(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36933,19 +36868,26 @@ pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - vqshlq_u64(a, vdupq_n_s64(N as _)) +pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] + fn _vqabs_s32(a: int32x2_t) -> int32x2_t; + } + unsafe { _vqabs_s32(a) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqabs) )] #[cfg_attr( not(target_arch = "arm"), @@ -36955,26 +36897,26 @@ pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v8i8" + link_name = "llvm.aarch64.neon.sqabs.v4i32" )] - fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] + fn _vqabsq_s32(a: int32x4_t) -> int32x4_t; } - unsafe { _vqshl_s8(a, b) } + unsafe { _vqabsq_s32(a) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -36984,26 +36926,18 @@ pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v16i8" - )] - fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vqshlq_s8(a, b) } +pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37013,26 +36947,18 @@ pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v4i16" - )] - fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vqshl_s16(a, b) } +pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37042,26 +36968,18 @@ pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v8i16" - )] - fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vqshlq_s16(a, b) } +pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37071,26 +36989,18 @@ pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v2i32" - )] - fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vqshl_s32(a, b) } +pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37100,26 +37010,18 @@ pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v4i32" - )] - fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vqshlq_s32(a, b) } +pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37129,26 +37031,18 @@ pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v1i64" - )] - fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } - unsafe { _vqshl_s64(a, b) } +pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37158,26 +37052,18 @@ pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v2i64" - )] - fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } - unsafe { _vqshlq_s64(a, b) } +pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37187,26 +37073,18 @@ pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v8i8" - )] - fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vqshl_u8(a, b) } +pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37216,26 +37094,18 @@ pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v16i8" - )] - fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vqshlq_u8(a, b) } +pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37245,26 +37115,18 @@ pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v4i16" - )] - fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vqshl_u16(a, b) } +pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37274,26 +37136,18 @@ pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v8i16" - )] - fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vqshlq_u16(a, b) } +pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37303,26 +37157,18 @@ pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v2i32" - )] - fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vqshl_u32(a, b) } +pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37332,26 +37178,18 @@ pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v4i32" - )] - fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vqshlq_u32(a, b) } +pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37361,26 +37199,18 @@ pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v1i64" - )] - fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vqshl_u64(a, b) } +pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37390,622 +37220,363 @@ pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v2i64" - )] - fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vqshlq_u64(a, b) } -} -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] - fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; - } - unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } -} -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] - fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; - } - unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } +pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] - fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; - } - unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] - fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; - } - unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } -} -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] - fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; - } - unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlal, N = 2) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + vqaddq_s32(a, vqdmull_lane_s16::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] - fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; - } - unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlal, N = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + vqaddq_s64(a, vqdmull_lane_s32::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] - fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; - } - unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqaddq_s32(a, vqdmull_n_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] - fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; - } - unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqaddq_s64(a, vqdmull_n_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] +#[doc = "Signed saturating doubling multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v8i8" - )] - fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; - } - unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqaddq_s32(a, vqdmull_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] +#[doc = "Signed saturating doubling multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v16i8" - )] - fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; - } - unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqaddq_s64(a, vqdmull_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v4i16" - )] - fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; - } - unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlsl, N = 2) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + vqsubq_s32(a, vqdmull_lane_s16::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v8i16" - )] - fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; - } - unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlsl, N = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + vqsubq_s64(a, vqdmull_lane_s32::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v2i32" - )] - fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; - } - unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqsubq_s32(a, vqdmull_n_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v4i32" - )] - fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; - } - unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqsubq_s64(a, vqdmull_n_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[doc = "Signed saturating doubling multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v1i64" - )] - fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; - } - unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqsubq_s32(a, vqdmull_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[doc = "Signed saturating doubling multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v2i64" - )] - fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; - } - unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqsubq_s64(a, vqdmull_s32(b, c)) } -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] - fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } - unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::(b))) } -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] - fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } - unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::(b))) } -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] - fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } - unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrn.v8i8" - )] - fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; - } - unsafe { _vqshrn_n_s16(a, N) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrn.v4i16" - )] - fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; - } - unsafe { _vqshrn_n_s32(a, N) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrn.v2i32" - )] - fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; - } - unsafe { _vqshrn_n_s64(a, N) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] - fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; - } - unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] - fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; - } - unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] - fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; - } - unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshrn.v8i8" - )] - fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; - } - unsafe { _vqshrn_n_u16(a, N) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshrn.v4i16" - )] - fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; - } - unsafe { _vqshrn_n_u32(a, N) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshrn.v2i32" - )] - fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; - } - unsafe { _vqshrn_n_u64(a, N) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] - fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; - } - unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] - fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; - } - unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] - fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; - } - unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrun.v8i8" - )] - fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; - } - unsafe { _vqshrun_n_s16(a, N) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrun.v4i16" - )] - fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; - } - unsafe { _vqshrun_n_s32(a, N) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrun.v2i32" - )] - fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; - } - unsafe { _vqshrun_n_s64(a, N) } -} -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38014,19 +37585,21 @@ pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::(b))) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38035,18 +37608,19 @@ pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::(b))) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38056,18 +37630,19 @@ pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + let b: int16x4_t = vdup_n_s16(b); + vqdmulh_s16(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38077,18 +37652,19 @@ pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + let b: int16x8_t = vdupq_n_s16(b); + vqdmulhq_s16(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38098,18 +37674,19 @@ pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + let b: int32x2_t = vdup_n_s32(b); + vqdmulh_s32(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38119,18 +37696,19 @@ pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + let b: int32x4_t = vdupq_n_s32(b); + vqdmulhq_s32(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38140,18 +37718,26 @@ pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v4i16" + )] + fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqdmulh_s16(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38161,18 +37747,26 @@ pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v8i16" + )] + fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqdmulhq_s16(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38182,18 +37776,26 @@ pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v2i32" + )] + fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqdmulh_s32(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38203,19 +37805,28 @@ pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v4i32" + )] + fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqdmulhq_s32(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"] +#[doc = "Vector saturating doubling long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38224,19 +37835,22 @@ pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + let b = vdup_lane_s16::(b); + vqdmull_s16(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"] +#[doc = "Vector saturating doubling long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38245,18 +37859,20 @@ pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + let b = vdup_lane_s32::(b); + vqdmull_s32(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"] +#[doc = "Vector saturating doubling long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -38266,18 +37882,18 @@ pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vqdmull_s16(a, vdup_n_s16(b)) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"] +#[doc = "Vector saturating doubling long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -38287,18 +37903,18 @@ pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vqdmull_s32(a, vdup_n_s32(b)) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"] +#[doc = "Signed saturating doubling multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -38308,43 +37924,26 @@ pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_saturating_sub(a, b) } -} -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmull.v4i32" + )] + fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } + unsafe { _vqdmull_s16(a, b) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"] +#[doc = "Signed saturating doubling multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(raddhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -38354,23 +37953,26 @@ pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let x = vraddhn_s16(b, c); - vcombine_s8(a, x) +pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmull.v2i64" + )] + fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } + unsafe { _vqdmull_s32(a, b) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"] +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(raddhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -38380,23 +37982,26 @@ pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let x = vraddhn_s32(b, c); - vcombine_s16(a, x) +pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v8i8" + )] + fn _vqmovn_s16(a: int16x8_t) -> int8x8_t; + } + unsafe { _vqmovn_s16(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"] +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(raddhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -38406,23 +38011,26 @@ pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let x = vraddhn_s64(b, c); - vcombine_s32(a, x) +pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v4i16" + )] + fn _vqmovn_s32(a: int32x4_t) -> int16x4_t; + } + unsafe { _vqmovn_s32(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"] +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(raddhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -38432,25 +38040,26 @@ pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - unsafe { - let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c))); - vcombine_u8(a, x) +pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v2i32" + )] + fn _vqmovn_s64(a: int64x2_t) -> int32x2_t; } + unsafe { _vqmovn_s64(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"] +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(raddhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -38460,25 +38069,26 @@ pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - unsafe { - let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c))); - vcombine_u16(a, x) +pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v8i8" + )] + fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t; } + unsafe { _vqmovn_u16(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"] +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(raddhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -38488,21 +38098,26 @@ pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - unsafe { - let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c))); - vcombine_u32(a, x) +pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v4i16" + )] + fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t; } + unsafe { _vqmovn_u32(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"] +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(uqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -38512,26 +38127,26 @@ pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { +pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.raddhn.v8i8" + link_name = "llvm.aarch64.neon.uqxtn.v2i32" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")] - fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; + fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t; } - unsafe { _vraddhn_s16(a, b) } + unsafe { _vqmovn_u64(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"] +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqxtun) )] #[cfg_attr( not(target_arch = "arm"), @@ -38541,26 +38156,26 @@ pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { +pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.raddhn.v4i16" + link_name = "llvm.aarch64.neon.sqxtun.v8i8" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")] - fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t; } - unsafe { _vraddhn_s32(a, b) } + unsafe { _vqmovun_s16(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"] +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqxtun) )] #[cfg_attr( not(target_arch = "arm"), @@ -38570,27 +38185,26 @@ pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { +pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.raddhn.v2i32" + link_name = "llvm.aarch64.neon.sqxtun.v4i16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")] - fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; + fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t; } - unsafe { _vraddhn_s64(a, b) } + unsafe { _vqmovun_s32(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqxtun) )] #[cfg_attr( not(target_arch = "arm"), @@ -38600,19 +38214,26 @@ pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) } +pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v2i32" + )] + fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t; + } + unsafe { _vqmovun_s64(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -38622,24 +38243,26 @@ pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] + fn _vqneg_s8(a: int8x8_t) -> int8x8_t; } + unsafe { _vqneg_s8(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -38649,19 +38272,26 @@ pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) } +pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] + fn _vqnegq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vqnegq_s8(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -38671,24 +38301,26 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqneg_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] + fn _vqneg_s16(a: int16x4_t) -> int16x4_t; } + unsafe { _vqneg_s16(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -38698,19 +38330,26 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) } +pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] + fn _vqnegq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vqnegq_s16(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -38720,84 +38359,81 @@ pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); - let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] + fn _vqneg_s32(a: int32x2_t) -> int32x2_t; } + unsafe { _vqneg_s32(a) } } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqneg) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t { +pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v4f16" + link_name = "llvm.aarch64.neon.sqneg.v4i32" )] - fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] + fn _vqnegq_s32(a: int32x4_t) -> int32x4_t; } - unsafe { _vrecpe_f16(a) } + unsafe { _vqnegq_s32(a) } } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v8f16" - )] - fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; - } - unsafe { _vrecpeq_f16(a) } +pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdup_lane_s16::(b); + vqrdmulh_s16(a, b) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38806,27 +38442,22 @@ pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v2f32" - )] - fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrecpe_f32(a) } +pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let b = vdup_lane_s32::(b); + vqrdmulh_s32(a, b) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38835,27 +38466,22 @@ pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v4f32" - )] - fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrecpeq_f32(a) } +pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + let b = vdup_laneq_s16::(b); + vqrdmulh_s16(a, b) } -#[doc = "Unsigned reciprocal estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38864,27 +38490,22 @@ pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urecpe.v2i32" - )] - fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t; - } - unsafe { _vrecpe_u32(a) } +pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdup_laneq_s32::(b); + vqrdmulh_s32(a, b) } -#[doc = "Unsigned reciprocal estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38893,87 +38514,70 @@ pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urecpe.v4i32" - )] - fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t; - } - unsafe { _vrecpeq_u32(a) } +pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdupq_lane_s16::(b); + vqrdmulhq_s16(a, b) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh, LANE = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v4f16" - )] - fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vrecps_f16(a, b) } +pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let b = vdupq_lane_s32::(b); + vqrdmulhq_s32(a, b) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh, LANE = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v8f16" - )] - fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; - } - unsafe { _vrecpsq_f16(a, b) } +pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + let b = vdupq_laneq_s16::(b); + vqrdmulhq_s16(a, b) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -38982,26 +38586,20 @@ pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v2f32" - )] - fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vrecps_f32(a, b) } +pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdupq_laneq_s32::(b); + vqrdmulhq_s32(a, b) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39011,2360 +38609,3052 @@ pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v4f32" - )] - fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vrecpsq_f32(a, b) } +pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + vqrdmulh_s16(a, vdup_n_s16(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + vqrdmulhq_s16(a, vdupq_n_s16(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + vqrdmulh_s32(a, vdup_n_s32(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + vqrdmulhq_s32(a, vdupq_n_s32(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v4i16" + )] + fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } + unsafe { _vqrdmulh_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v8i16" + )] + fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqrdmulhq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v2i32" + )] + fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vqrdmulh_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v4i32" + )] + fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqrdmulhq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v8i8" + )] + fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } + unsafe { _vqrshl_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v16i8" + )] + fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqrshlq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v4i16" + )] + fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } + unsafe { _vqrshl_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v8i16" + )] + fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqrshlq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v2i32" + )] + fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vqrshl_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v4i32" + )] + fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqrshlq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v1i64" + )] + fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; } + unsafe { _vqrshl_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v2i64" + )] + fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqrshlq_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v8i8" + )] + fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; } + unsafe { _vqrshl_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v16i8" + )] + fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vqrshlq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v4i16" + )] + fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; } + unsafe { _vqrshl_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v8i16" + )] + fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vqrshlq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v2i32" + )] + fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; } + unsafe { _vqrshl_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v4i32" + )] + fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vqrshlq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v1i64" + )] + fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; } + unsafe { _vqrshl_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v2i64" + )] + fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vqrshlq_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] + fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] + fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] + fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v8i8" + )] + fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vqrshrn_n_s16(a, N) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v4i16" + )] + fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vqrshrn_n_s32(a, N) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v2i32" + )] + fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vqrshrn_n_s64(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] + fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } + unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] + fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } + unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] + fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v8i8" + )] + fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqrshrn_n_u16(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v4i16" + )] + fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqrshrn_n_u32(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v2i32" + )] + fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqrshrn_n_u64(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] + fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } + unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] + fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } + unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] + fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } + unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v8i8" + )] + fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqrshrun_n_s16(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v4i16" + )] + fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqrshrun_n_s32(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v2i32" + )] + fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqrshrun_n_s64(a, N) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + vqshl_s8(a, vdup_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + vqshlq_s8(a, vdupq_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + vqshl_s16(a, vdup_n_s16(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + vqshlq_s16(a, vdupq_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 5); + vqshl_s32(a, vdup_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 5); + vqshlq_s32(a, vdupq_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_uimm_bits!(N, 6); + vqshl_s64(a, vdup_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 6); + vqshlq_s64(a, vdupq_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + vqshl_u8(a, vdup_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + vqshlq_u8(a, vdupq_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + vqshl_u16(a, vdup_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + vqshlq_u16(a, vdupq_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + vqshl_u32(a, vdup_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + vqshlq_u32(a, vdupq_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + vqshl_u64(a, vdup_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + vqshlq_u64(a, vdupq_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v8i8" + )] + fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } + unsafe { _vqshl_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v16i8" + )] + fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqshlq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v4i16" + )] + fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } + unsafe { _vqshl_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v8i16" + )] + fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqshlq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v2i32" + )] + fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vqshl_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v4i32" + )] + fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqshlq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v1i64" + )] + fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; } + unsafe { _vqshl_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v2i64" + )] + fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqshlq_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v8i8" + )] + fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; } + unsafe { _vqshl_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v16i8" + )] + fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vqshlq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] -#[inline] -#[cfg(target_endian = "big")] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"] +#[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v4i16" + )] + fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; } + unsafe { _vqshl_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v8i16" + )] + fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vqshlq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v2i32" + )] + fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; } + unsafe { _vqshl_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v4i32" + )] + fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vqshlq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v1i64" + )] + fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; } + unsafe { _vqshl_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v2i64" + )] + fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshlq_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] + fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; } + unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] + fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } + unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] + fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; } + unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] + fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } + unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] + fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; } + unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] + fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } + unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] + fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; } + unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] + fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v8i8" + )] + fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; } + unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v16i8" + )] + fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } + unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v4i16" + )] + fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } + unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v8i16" + )] + fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } + unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v2i32" + )] + fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } + unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v4i32" + )] + fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } + unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v1i64" + )] + fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } + unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v2i64" + )] + fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] + fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] + fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] + fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v8i8" + )] + fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vqshrn_n_s16(a, N) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v4i16" + )] + fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vqshrn_n_s32(a, N) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v2i32" + )] + fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vqshrn_n_s64(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] + fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } + unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] + fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } + unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] + fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v8i8" + )] + fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqshrn_n_u16(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v4i16" + )] + fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqshrn_n_u32(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v2i32" + )] + fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqshrn_n_u64(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] + fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } + unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] + fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } + unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] + fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } + unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v8i8" + )] + fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqshrun_n_s16(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v4i16" + )] + fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqshrun_n_s32(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v2i32" + )] + fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqshrun_n_s64(a, N) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let x = vraddhn_s16(b, c); + vcombine_s8(a, x) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let x = vraddhn_s32(b, c); + vcombine_s16(a, x) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { - unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let x = vraddhn_s64(b, c); + vcombine_s32(a, x) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + unsafe { + let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c))); + vcombine_u8(a, x) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { +pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c))); + vcombine_u16(a, x) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + unsafe { + let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c))); + vcombine_u32(a, x) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")] + fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; } + unsafe { _vraddhn_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { - unsafe { transmute(a) } +pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")] + fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + } + unsafe { _vraddhn_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { - unsafe { - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")] + fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; } + unsafe { _vraddhn_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -41374,23 +41664,27 @@ pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) +pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f16" + )] + fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; } + unsafe { _vrecpe_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -41400,96 +41694,143 @@ pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v8f16" + )] + fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrecpeq_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v2f32" + )] + fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; } + unsafe { _vrecpe_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f32" + )] + fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrecpeq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v2i32" + )] + fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t; } + unsafe { _vrecpe_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v4i32" + )] + fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t; + } + unsafe { _vrecpeq_u32(a) } +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] +#[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -41499,20 +41840,27 @@ pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f16" + )] + fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vrecps_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -41522,23 +41870,26 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v8f16" + )] + fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; } + unsafe { _vrecpsq_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] #[cfg_attr( not(target_arch = "arm"), @@ -41548,19 +41899,26 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v2f32" + )] + fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vrecps_f32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] #[cfg_attr( not(target_arch = "arm"), @@ -41570,16 +41928,20 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { - unsafe { - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f32" + )] + fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; } + unsafe { _vrecpsq_f32(a, b) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41589,19 +41951,19 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41611,23 +41973,19 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41637,19 +41995,19 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41659,23 +42017,19 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41685,19 +42039,19 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41707,23 +42061,19 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41733,19 +42083,19 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41755,22 +42105,19 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41780,19 +42127,19 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41802,23 +42149,19 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41828,19 +42171,19 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41850,23 +42193,19 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41876,19 +42215,19 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41898,23 +42237,19 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41924,19 +42259,19 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41946,22 +42281,19 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41971,19 +42303,19 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -41993,23 +42325,19 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42019,19 +42347,19 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42041,23 +42369,19 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42067,19 +42391,19 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42089,22 +42413,19 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42114,19 +42435,19 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42136,27 +42457,19 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42166,19 +42479,19 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42188,23 +42501,19 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42214,19 +42523,19 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42236,23 +42545,19 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42262,19 +42567,19 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42284,23 +42589,19 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42310,19 +42611,19 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42332,27 +42633,19 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42362,19 +42655,19 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42384,23 +42677,19 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42410,19 +42699,19 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42432,23 +42721,19 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42458,19 +42743,19 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42480,23 +42765,19 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42506,19 +42787,19 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42528,27 +42809,19 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42558,19 +42831,19 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42580,23 +42853,19 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42606,19 +42875,19 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42628,23 +42897,19 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42660,13 +42925,12 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { +pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42682,17 +42946,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42708,13 +42967,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { +pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42730,17 +42988,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42756,13 +43009,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { +pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42778,16 +43030,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42803,13 +43051,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { +pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42825,17 +43072,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42851,13 +43093,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { +pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42873,17 +43114,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42899,13 +43135,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { +pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42921,17 +43156,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42947,13 +43177,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { +pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42969,16 +43198,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42994,13 +43219,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { +pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43016,17 +43240,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43042,13 +43261,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { +pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43064,17 +43282,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43090,13 +43303,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { +pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43112,18 +43324,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43139,13 +43345,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { +pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43161,18 +43366,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43188,13 +43387,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { +pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43210,18 +43408,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43237,13 +43429,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { +pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43259,18 +43450,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43286,13 +43471,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { +pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43308,22 +43492,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43339,13 +43513,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { +pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43361,18 +43534,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43388,13 +43555,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { +pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43410,18 +43576,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43437,13 +43597,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { +pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43459,18 +43618,12 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43486,13 +43639,12 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { +pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43508,22 +43660,12 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43539,13 +43681,12 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { +pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43561,18 +43702,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43588,13 +43723,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { +pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43610,19 +43744,14 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -43630,21 +43759,21 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -43652,25 +43781,21 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -43678,21 +43803,21 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -43700,25 +43825,21 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -43726,21 +43847,21 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -43748,22 +43869,19 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43779,13 +43897,12 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { +pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43801,17 +43918,12 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43827,13 +43939,12 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { +pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43849,17 +43960,12 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43875,13 +43981,12 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { +pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43897,17 +44002,12 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43923,13 +44023,12 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { +pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43945,16 +44044,12 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43970,13 +44065,12 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { +pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43992,17 +44086,12 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44018,13 +44107,12 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { +pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44040,17 +44128,12 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44066,13 +44149,12 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { +pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44088,17 +44170,12 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44114,13 +44191,12 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { +pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44136,21 +44212,12 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44166,13 +44233,12 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { +pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44188,17 +44254,12 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44214,13 +44275,12 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { +pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44236,17 +44296,12 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44262,13 +44317,12 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { +pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44284,21 +44338,12 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44314,13 +44359,12 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { +pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44336,17 +44380,12 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44362,13 +44401,12 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { +pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44384,17 +44422,12 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44410,13 +44443,12 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { +pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44432,17 +44464,12 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44458,13 +44485,12 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { +pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44480,21 +44506,12 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44510,13 +44527,12 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { +pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44532,17 +44548,12 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44558,13 +44569,12 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { +pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44580,17 +44590,12 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44606,13 +44611,12 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { +pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44628,17 +44632,12 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44654,13 +44653,12 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { +pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44676,17 +44674,12 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44702,13 +44695,12 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { +pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44724,16 +44716,12 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44749,13 +44737,12 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { +pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44771,17 +44758,12 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44797,13 +44779,12 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { +pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44819,17 +44800,12 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44845,13 +44821,12 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { +pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44867,17 +44842,12 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44893,13 +44863,12 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { +pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44915,16 +44884,12 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44940,13 +44905,12 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { +pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44962,17 +44926,12 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44988,13 +44947,12 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { +pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45010,17 +44968,12 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45036,13 +44989,12 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { +pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45058,17 +45010,12 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45084,13 +45031,12 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { +pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45106,21 +45052,12 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45136,13 +45073,12 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { +pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45158,17 +45094,12 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45184,13 +45115,12 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { +pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45206,17 +45136,12 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45232,13 +45157,12 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { +pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45254,21 +45178,12 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45284,13 +45199,12 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { +pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45306,17 +45220,12 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45332,13 +45241,12 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { +pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45354,17 +45262,12 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45380,13 +45283,12 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { +pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45402,17 +45304,12 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45428,13 +45325,12 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { +pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45450,21 +45346,12 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45480,13 +45367,12 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { +pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45502,17 +45388,12 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45528,13 +45409,12 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { +pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45550,16 +45430,12 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45575,13 +45451,12 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { +pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45597,16 +45472,12 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { - unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45622,13 +45493,12 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { +pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45645,15 +45515,11 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { - unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45673,9 +45539,8 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45691,16 +45556,12 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45716,13 +45577,12 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { +pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45738,16 +45598,12 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { - unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45763,13 +45619,12 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { +pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45785,16 +45640,12 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { - unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45810,13 +45661,12 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { +pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45832,14 +45682,11 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { - unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -45856,13 +45703,12 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { +pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45878,13 +45724,12 @@ pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { +pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45900,16 +45745,12 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45925,13 +45766,12 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { +pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45947,16 +45787,12 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45972,13 +45808,12 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { +pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45994,17 +45829,12 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46020,13 +45850,12 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { +pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46042,21 +45871,12 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46072,13 +45892,12 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { +pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46094,17 +45913,12 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46120,13 +45934,12 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { +pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46142,17 +45955,12 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46168,13 +45976,12 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { +pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46190,21 +45997,12 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46220,13 +46018,12 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { +pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46242,17 +46039,12 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46268,13 +46060,12 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { +pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46290,17 +46081,12 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46316,13 +46102,12 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { +pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46338,17 +46123,12 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46364,13 +46144,12 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { +pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46386,21 +46165,12 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46416,13 +46186,12 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { +pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46438,17 +46207,12 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46464,13 +46228,12 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { +pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46486,17 +46249,12 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46512,13 +46270,12 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { +pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46534,17 +46291,12 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46560,13 +46312,12 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { +pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46582,17 +46333,12 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46608,13 +46354,12 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { +pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46630,17 +46375,12 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46656,13 +46396,12 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { +pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46678,16 +46417,12 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46703,13 +46438,12 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { +pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46725,17 +46459,12 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46751,13 +46480,12 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { +pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46773,17 +46501,12 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46799,13 +46522,12 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { +pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46821,16 +46543,12 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46846,13 +46564,12 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { +pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46868,17 +46585,12 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46894,13 +46606,12 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { +pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46916,17 +46627,12 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46942,13 +46648,12 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { +pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46964,18 +46669,12 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46991,13 +46690,12 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { +pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47013,22 +46711,12 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47044,13 +46732,12 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { +pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47066,18 +46753,12 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47093,13 +46774,12 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { +pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47115,18 +46795,12 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47142,13 +46816,12 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { +pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47164,18 +46837,12 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47191,13 +46858,12 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { +pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47213,18 +46879,12 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47240,13 +46900,12 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { +pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47262,18 +46921,12 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47289,13 +46942,12 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { +pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47311,18 +46963,12 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47338,13 +46984,12 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { +pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47360,22 +47005,12 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47391,13 +47026,12 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { +pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47413,18 +47047,12 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47440,13 +47068,12 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { +pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47462,17 +47089,12 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47488,13 +47110,12 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { +pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47510,17 +47131,12 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47536,13 +47152,12 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { +pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47558,17 +47173,12 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47584,13 +47194,12 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { +pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47606,17 +47215,12 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47632,13 +47236,12 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { +pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47654,16 +47257,12 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47679,13 +47278,12 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { +pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47701,17 +47299,12 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47727,13 +47320,12 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { +pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47749,17 +47341,12 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47775,13 +47362,12 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { +pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47797,16 +47383,12 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47822,13 +47404,12 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { +pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47844,17 +47425,12 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47870,13 +47446,12 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { +pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47892,17 +47467,12 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47918,13 +47488,12 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { +pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47940,17 +47509,12 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47966,13 +47530,12 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { +pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47988,21 +47551,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48018,13 +47572,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { +pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48040,17 +47593,12 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48066,13 +47614,12 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { +pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48088,17 +47635,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48114,13 +47656,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { +pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48136,17 +47677,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48162,13 +47698,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { +pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48184,21 +47719,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48214,13 +47740,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { +pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48236,17 +47761,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48262,13 +47782,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { +pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48284,17 +47803,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48310,13 +47824,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { +pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48332,21 +47845,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48362,13 +47866,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { +pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48384,17 +47887,12 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48410,13 +47908,12 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { +pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48432,17 +47929,12 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48458,13 +47950,12 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { +pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48480,17 +47971,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48506,13 +47992,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { +pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48528,17 +48013,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48554,13 +48034,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { +pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48576,17 +48055,12 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48602,13 +48076,12 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { +pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48624,16 +48097,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48649,13 +48118,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { +pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48671,17 +48139,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48697,13 +48160,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { +pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48719,17 +48181,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48745,13 +48202,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { +pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48767,16 +48223,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48792,13 +48244,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { +pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48814,17 +48265,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48840,13 +48286,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { +pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48862,17 +48307,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48888,13 +48328,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { +pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48910,17 +48349,12 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48936,13 +48370,12 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { +pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48958,21 +48391,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48988,13 +48412,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { +pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49010,17 +48433,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49036,13 +48454,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { +pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49058,17 +48475,12 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49084,13 +48496,12 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { +pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49106,17 +48517,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49132,13 +48538,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { +pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49154,21 +48559,12 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49184,15 +48580,14 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { +pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49206,19 +48601,14 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49232,15 +48622,14 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { +pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49254,19 +48643,14 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49280,15 +48664,14 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { +pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49302,23 +48685,14 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49332,15 +48706,14 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { +pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49354,19 +48727,14 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49380,15 +48748,14 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { +pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49402,18 +48769,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49427,15 +48790,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { +pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49449,18 +48811,14 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { - unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -49474,19 +48832,19 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { +pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { unsafe { transmute(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] #[inline] -#[cfg(target_endian = "big")] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49496,22 +48854,19 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { - unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] #[inline] -#[cfg(target_endian = "little")] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49521,19 +48876,23 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] #[inline] -#[cfg(target_endian = "big")] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49543,21 +48902,19 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49567,19 +48924,23 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49589,19 +48950,19 @@ pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49611,22 +48972,23 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { +pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { unsafe { - let ret_val: uint8x8_t = transmute(a); + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49636,19 +48998,19 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49658,22 +49020,29 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { +pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49683,19 +49052,19 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49705,22 +49074,29 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { +pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = + simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49730,19 +49106,19 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -49752,22 +49128,29 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { +pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49777,19 +49160,19 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49799,22 +49182,23 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { +pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { unsafe { - let ret_val: poly16x4_t = transmute(a); + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49824,19 +49208,19 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49846,23 +49230,23 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { +pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49872,19 +49256,19 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49894,27 +49278,23 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { +pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49924,19 +49304,19 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49946,23 +49326,23 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { +pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49972,19 +49352,19 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -49994,23 +49374,23 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { +pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50020,19 +49400,19 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50042,23 +49422,23 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { +pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50068,19 +49448,19 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50090,27 +49470,23 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { +pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50120,19 +49496,19 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50142,23 +49518,29 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { +pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50168,19 +49550,19 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50190,23 +49572,23 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { +pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50216,19 +49598,19 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50238,10 +49620,12 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { +pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]); simd_shuffle!( ret_val, ret_val, @@ -50249,16 +49633,16 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50268,19 +49652,19 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50290,23 +49674,23 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { +pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50316,19 +49700,19 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50338,23 +49722,29 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { +pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50364,19 +49754,19 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50386,23 +49776,23 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { +pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [1, 0]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50412,19 +49802,19 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50434,23 +49824,23 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { +pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50460,19 +49850,19 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50482,23 +49872,23 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { +pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { unsafe { let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let ret_val: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50508,19 +49898,19 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50530,22 +49920,23 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { +pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50555,19 +49946,19 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50577,23 +49968,23 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { +pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [1, 0]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50603,19 +49994,19 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50625,23 +50016,23 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { +pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50651,19 +50042,19 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50673,23 +50064,23 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { +pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50699,19 +50090,19 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50721,22 +50112,23 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { +pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50746,19 +50138,19 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50768,23 +50160,23 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { +pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50794,19 +50186,19 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50816,24 +50208,23 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { +pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50843,19 +50234,19 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50865,28 +50256,23 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { +pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50896,19 +50282,19 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50918,24 +50304,29 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { +pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { unsafe { let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let ret_val: poly8x16_t = + simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50945,19 +50336,19 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50967,24 +50358,23 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { +pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -50994,19 +50384,19 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51016,24 +50406,23 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { +pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51043,19 +50432,19 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51065,11 +50454,12 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { +pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { unsafe { - let a: poly8x16_t = + let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); + let ret_val: int8x16_t = + simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]); simd_shuffle!( ret_val, ret_val, @@ -51077,16 +50467,16 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51096,19 +50486,19 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51118,24 +50508,23 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { +pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51145,19 +50534,19 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51167,24 +50556,23 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { +pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51194,19 +50582,19 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51216,121 +50604,128 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { +pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { unsafe { - let a: poly8x16_t = + let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let ret_val: uint8x16_t = + simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51340,19 +50735,26 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] + fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vrhadd_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51362,23 +50764,26 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] + fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; } + unsafe { _vrhaddq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51388,19 +50793,26 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] + fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vrhadd_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51410,23 +50822,26 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] + fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; } + unsafe { _vrhaddq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51436,19 +50851,26 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] + fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vrhadd_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51458,23 +50880,26 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] + fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vrhaddq_s32(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51484,19 +50909,26 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] + fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vrhadd_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51506,22 +50938,26 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] + fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; } + unsafe { _vrhaddq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51531,19 +50967,26 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] + fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vrhadd_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51553,23 +50996,26 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] + fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; } + unsafe { _vrhaddq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51579,19 +51025,26 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] + fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vrhadd_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -51601,71 +51054,84 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] + fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } + unsafe { _vrhaddq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrndn_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v4f16" + )] + fn _vrndn_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrndn_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v8f16" + )] + fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; } + unsafe { _vrndnq_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] #[cfg_attr( not(target_arch = "arm"), @@ -51675,19 +51141,25 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v2f32" + )] + fn _vrndn_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrndn_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] #[cfg_attr( not(target_arch = "arm"), @@ -51697,22 +51169,25 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v4f32" + )] + fn _vrndnq_f32(a: float32x4_t) -> float32x4_t; } + unsafe { _vrndnq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51722,19 +51197,26 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v8i8" + )] + fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vrshl_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51744,23 +51226,26 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v16i8" + )] + fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; } + unsafe { _vrshlq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51770,19 +51255,26 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v4i16" + )] + fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vrshl_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51792,23 +51284,26 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v8i16" + )] + fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; } + unsafe { _vrshlq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51818,19 +51313,26 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v2i32" + )] + fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vrshl_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51840,27 +51342,26 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v4i32" + )] + fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; } + unsafe { _vrshlq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51870,19 +51371,26 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v1i64" + )] + fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vrshl_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51892,23 +51400,26 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v2i64" + )] + fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; } + unsafe { _vrshlq_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51918,19 +51429,26 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v8i8" + )] + fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vrshl_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51940,23 +51458,26 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v16i8" + )] + fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; } + unsafe { _vrshlq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51966,19 +51487,26 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v4i16" + )] + fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vrshl_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -51988,23 +51516,26 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v8i16" + )] + fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; } + unsafe { _vrshlq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52014,19 +51545,26 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v2i32" + )] + fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vrshl_u32(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52036,27 +51574,26 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v4i32" + )] + fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; } + unsafe { _vrshlq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52066,19 +51603,26 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v1i64" + )] + fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vrshl_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52088,24 +51632,28 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v2i64" + )] + fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; } + unsafe { _vrshlq_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52114,20 +51662,21 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + vrshl_s8(a, vdup_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52136,24 +51685,21 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vrshlq_s8(a, vdupq_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52162,20 +51708,21 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + vrshl_s16(a, vdup_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52184,24 +51731,21 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vrshlq_s16(a, vdupq_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52210,20 +51754,21 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + vrshl_s32(a, vdup_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52232,28 +51777,21 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vrshlq_s32(a, vdupq_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52262,20 +51800,21 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + vrshl_s64(a, vdup_n_s64(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52284,27 +51823,21 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { - unsafe { - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + vrshlq_s64(a, vdupq_n_s64(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52313,20 +51846,21 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + vrshl_u8(a, vdup_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52335,23 +51869,21 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { - unsafe { - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vrshlq_u8(a, vdupq_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52360,20 +51892,21 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + vrshl_u16(a, vdup_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52382,23 +51915,21 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { - unsafe { - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vrshlq_u16(a, vdupq_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52407,20 +51938,21 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + vrshl_u32(a, vdup_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52429,23 +51961,21 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { - unsafe { - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vrshlq_u32(a, vdupq_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52454,20 +51984,21 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + vrshl_u64(a, vdup_n_s64(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52476,27 +52007,126 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { - unsafe { - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + vrshlq_u64(a, vdupq_n_s64(-N as _)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] + fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; } + unsafe { _vrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] + fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { _vrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] + fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v8i8" + )] + fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vrshrn_n_s16(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v4i16" + )] + fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vrshrn_n_s32(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v2i32" + )] + fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vrshrn_n_s64(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rshrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52505,20 +52135,21 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +pub fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vrshrn_n_s16::(transmute(a))) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rshrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52527,23 +52158,21 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { - unsafe { - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vrshrn_n_s32::(transmute(a))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rshrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52552,66 +52181,79 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vrshrn_n_s64::(transmute(a))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { - unsafe { - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f16" + )] + fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; } + unsafe { _vrsqrte_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v8f16" + )] + fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrsqrteq_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -52621,22 +52263,26 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { - unsafe { - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v2f32" + )] + fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t; } + unsafe { _vrsqrte_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -52646,19 +52292,26 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f32" + )] + fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrsqrteq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[doc = "Unsigned reciprocal square root estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -52668,26 +52321,26 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { - unsafe { - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ursqrte.v2i32" + )] + fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t; } + unsafe { _vrsqrte_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[doc = "Unsigned reciprocal square root estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -52697,66 +52350,86 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ursqrte.v4i32" + )] + fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t; + } + unsafe { _vrsqrteq_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { - unsafe { - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f16" + )] + fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; } + unsafe { _vrsqrts_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v8f16" + )] + fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vrsqrtsq_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), @@ -52766,22 +52439,26 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { - unsafe { - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v2f32" + )] + fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } + unsafe { _vrsqrts_f32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), @@ -52791,20 +52468,28 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f32" + )] + fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vrsqrtsq_f32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52813,23 +52498,21 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshr_n_s8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52838,20 +52521,21 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshrq_n_s8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52860,24 +52544,21 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshr_n_s16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52886,20 +52567,21 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshrq_n_s16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52908,25 +52590,21 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshr_n_s32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52935,20 +52613,21 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshrq_n_s32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52957,23 +52636,21 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshr_n_s64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -52982,20 +52659,21 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshrq_n_s64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53004,23 +52682,21 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshr_n_u8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53029,20 +52705,21 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshrq_n_u8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53051,24 +52728,21 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshr_n_u16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53077,20 +52751,21 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshrq_n_u16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53099,23 +52774,21 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshr_n_u32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53124,20 +52797,21 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshrq_n_u32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53146,23 +52820,21 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshr_n_u64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53171,19 +52843,19 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshrq_n_u64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -53193,23 +52865,26 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v8i8" + )] + fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; } + unsafe { _vrsubhn_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -53219,19 +52894,26 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v4i16" + )] + fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + } + unsafe { _vrsubhn_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -53241,22 +52923,26 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) +pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v2i32" + )] + fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; } + unsafe { _vrsubhn_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -53266,19 +52952,18 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -53288,22 +52973,18 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -53313,46 +52994,101 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] -pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53361,20 +53097,22 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53383,25 +53121,26 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { +pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53410,20 +53149,22 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53432,23 +53173,26 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { +pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53457,20 +53201,22 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53479,23 +53225,26 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { +pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53504,20 +53253,22 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53526,24 +53277,31 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { +pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53552,20 +53310,22 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53574,23 +53334,26 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { +pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53599,20 +53362,22 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53621,23 +53386,26 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { +pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53646,20 +53414,22 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53668,24 +53438,26 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { +pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53694,20 +53466,22 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53716,23 +53490,26 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { +pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53741,20 +53518,22 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53763,23 +53542,26 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { +pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53788,20 +53570,22 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { - unsafe { transmute(a) } +pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53810,24 +53594,26 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { +pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53836,20 +53622,22 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53858,25 +53646,31 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { +pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53885,20 +53679,22 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53907,23 +53703,26 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { +pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53932,20 +53731,22 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53954,23 +53755,26 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { +pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53979,20 +53783,22 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54001,24 +53807,26 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { +pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54027,20 +53835,22 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54049,23 +53859,26 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { +pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54074,20 +53887,22 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54096,23 +53911,26 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { +pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54121,20 +53939,22 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54143,23 +53963,26 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { +pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54168,20 +53991,22 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54190,23 +54015,31 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { +pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54215,20 +54048,22 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54237,23 +54072,26 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { +pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let ret_val: uint16x4_t = transmute(a); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54262,20 +54100,22 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54284,45 +54124,25 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { +pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54331,23 +54151,21 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54356,20 +54174,21 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54378,23 +54197,22 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54403,20 +54221,22 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54425,439 +54245,809 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { +pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[doc = "SHA1 hash update accelerator, choose."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1c))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1c" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] + fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[doc = "SHA1 hash update accelerator, choose."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1c))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { +pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1c" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] + fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1cq_u32(hash_abcd, hash_e, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] +#[doc = "SHA1 fixed rotate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(test, assert_instr(sha1h))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vsha1h_u32(hash_e: u32) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")] + fn _vsha1h_u32(hash_e: u32) -> u32; } + unsafe { _vsha1h_u32(hash_e) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[doc = "SHA1 hash update accelerator, majority"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1m))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1m" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] + fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[doc = "SHA1 hash update accelerator, majority"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1m))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { +pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1m" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] + fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1mq_u32(hash_abcd, hash_e, wk); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[doc = "SHA1 hash update accelerator, parity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1p))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1p" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] + fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[doc = "SHA1 hash update accelerator, parity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1p))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { +pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1p" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] + fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1pq_u32(hash_abcd, hash_e, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] + fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { +pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] + fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let w0_3: uint32x4_t = simd_shuffle!(w0_3, w0_3, [3, 2, 1, 0]); + let w4_7: uint32x4_t = simd_shuffle!(w4_7, w4_7, [3, 2, 1, 0]); + let w8_11: uint32x4_t = simd_shuffle!(w8_11, w8_11, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1su0q_u32(w0_3, w4_7, w8_11); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[doc = "SHA1 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] + fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1su1q_u32(tw0_3, w12_15) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[doc = "SHA1 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { +pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] + fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); + let tw0_3: uint32x4_t = simd_shuffle!(tw0_3, tw0_3, [3, 2, 1, 0]); + let w12_15: uint32x4_t = simd_shuffle!(w12_15, w12_15, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1su1q_u32(tw0_3, w12_15); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[doc = "SHA1 schedule update accelerator, upper part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h2" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] + fn _vsha256h2q_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[doc = "SHA1 schedule update accelerator, upper part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { +pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h2" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] + fn _vsha256h2q_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let hash_efgh: uint32x4_t = simd_shuffle!(hash_efgh, hash_efgh, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256h2q_u32(hash_abcd, hash_efgh, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] +pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] + fn _vsha256hq_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) } +} +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256h))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { - unsafe { transmute(a) } +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] + fn _vsha256hq_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let hash_efgh: uint32x4_t = simd_shuffle!(hash_efgh, hash_efgh, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256hq_u32(hash_abcd, hash_efgh, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] +#[doc = "SHA256 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] +pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] + fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha256su0q_u32(w0_3, w4_7) } +} +#[doc = "SHA256 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su0))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] + fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let w0_3: uint32x4_t = simd_shuffle!(w0_3, w0_3, [3, 2, 1, 0]); + let w4_7: uint32x4_t = simd_shuffle!(w4_7, w4_7, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256su0q_u32(w0_3, w4_7); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] +#[doc = "SHA256 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] +pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] + fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) + -> uint32x4_t; + } + unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } +} +#[doc = "SHA256 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su1))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] + fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) + -> uint32x4_t; + } + unsafe { + let tw0_3: uint32x4_t = simd_shuffle!(tw0_3, tw0_3, [3, 2, 1, 0]); + let w8_11: uint32x4_t = simd_shuffle!(w8_11, w8_11, [3, 2, 1, 0]); + let w12_15: uint32x4_t = simd_shuffle!(w12_15, w12_15, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256su1q_u32(tw0_3, w8_11, w12_15); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54866,19 +55056,21 @@ pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdup_n_s8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54887,19 +55079,21 @@ pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdupq_n_s8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54908,19 +55102,21 @@ pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +pub fn vshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdup_n_s16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54929,19 +55125,21 @@ pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +pub fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdupq_n_s16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54950,19 +55148,21 @@ pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +pub fn vshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdup_n_s32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54971,19 +55171,21 @@ pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdupq_n_s32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54992,19 +55194,21 @@ pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdup_n_s64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55013,19 +55217,21 @@ pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdupq_n_s64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55034,19 +55240,21 @@ pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdup_n_u8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55055,19 +55263,21 @@ pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdupq_n_u8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55076,19 +55286,21 @@ pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdup_n_u16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55097,19 +55309,21 @@ pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdupq_n_u16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55118,19 +55332,21 @@ pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdup_n_u32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55139,19 +55355,21 @@ pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdupq_n_u32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55160,19 +55378,21 @@ pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdup_n_u64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55181,18 +55401,19 @@ pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdupq_n_u64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55202,18 +55423,26 @@ pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v8i8" + )] + fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vshl_s8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55223,18 +55452,26 @@ pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, a, [1, 0]) } +pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v16i8" + )] + fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vshlq_s8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55244,18 +55481,26 @@ pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v4i16" + )] + fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vshl_s16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55265,18 +55510,26 @@ pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v8i16" + )] + fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vshlq_s16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55286,18 +55539,26 @@ pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v2i32" + )] + fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vshl_s32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55307,18 +55568,26 @@ pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, a, [1, 0]) } +pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v4i32" + )] + fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vshlq_s32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55328,18 +55597,26 @@ pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v1i64" + )] + fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vshl_s64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55349,18 +55626,26 @@ pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v2i64" + )] + fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vshlq_s64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55370,18 +55655,26 @@ pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, a, [1, 0]) } +pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v8i8" + )] + fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vshl_u8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55391,18 +55684,26 @@ pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v16i8" + )] + fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vshlq_u8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55412,18 +55713,26 @@ pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v4i16" + )] + fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vshl_u16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55433,18 +55742,26 @@ pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v8i16" + )] + fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vshlq_u16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55454,18 +55771,26 @@ pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v2i32" + )] + fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vshl_u32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55475,18 +55800,26 @@ pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v4i32" + )] + fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vshlq_u32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55496,18 +55829,26 @@ pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v1i64" + )] + fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vshl_u64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -55517,19 +55858,28 @@ pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v2i64" + )] + fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vshlq_u64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55538,19 +55888,21 @@ pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 16); + unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55559,19 +55911,21 @@ pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 32); + unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55580,63 +55934,67 @@ pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { + static_assert!(N >= 0 && N <= 8); + unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) } } -#[doc = "Reverse elements in 64-bit doublewords"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushll, N = 2) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 16); + unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) } } -#[doc = "Reverse elements in 64-bit doublewords"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushll, N = 2) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 32); + unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(ushll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55645,27 +56003,21 @@ pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] - fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vrhadd_s8(a, b) } +pub fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { + static_assert!(N >= 0 && N <= 8); + unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55674,27 +56026,22 @@ pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] - fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vrhaddq_s8(a, b) } +pub fn vshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + unsafe { simd_shr(a, vdup_n_s8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55703,27 +56050,22 @@ pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] - fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vrhadd_s16(a, b) } +pub fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + unsafe { simd_shr(a, vdupq_n_s8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55732,27 +56074,22 @@ pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] - fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vrhaddq_s16(a, b) } +pub fn vshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + unsafe { simd_shr(a, vdup_n_s16(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55761,27 +56098,22 @@ pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] - fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vrhadd_s32(a, b) } +pub fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + unsafe { simd_shr(a, vdupq_n_s16(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55790,27 +56122,22 @@ pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] - fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vrhaddq_s32(a, b) } +pub fn vshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + unsafe { simd_shr(a, vdup_n_s32(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55819,27 +56146,22 @@ pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] - fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vrhadd_u8(a, b) } +pub fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + unsafe { simd_shr(a, vdupq_n_s32(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55848,27 +56170,22 @@ pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] - fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } - unsafe { _vrhaddq_u8(a, b) } +pub fn vshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + unsafe { simd_shr(a, vdup_n_s64(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55877,27 +56194,22 @@ pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] - fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vrhadd_u16(a, b) } +pub fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + unsafe { simd_shr(a, vdupq_n_s64(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55906,27 +56218,26 @@ pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] - fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } - unsafe { _vrhaddq_u16(a, b) } +pub fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { + return vdup_n_u8(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55935,27 +56246,26 @@ pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] - fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vrhadd_u32(a, b) } +pub fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { + return vdupq_n_u8(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55964,85 +56274,82 @@ pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] - fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } - unsafe { _vrhaddq_u32(a, b) } +pub fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { + return vdup_n_u16(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u16(n as _)) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) + assert_instr(ushr, N = 2) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrndn_f16(a: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v4f16" - )] - fn _vrndn_f16(a: float16x4_t) -> float16x4_t; - } - unsafe { _vrndn_f16(a) } +pub fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { + return vdupq_n_u16(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u16(n as _)) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) + assert_instr(ushr, N = 2) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v8f16" - )] - fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; - } - unsafe { _vrndnq_f16(a) } +pub fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { + return vdup_n_u32(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u32(n as _)) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56051,26 +56358,26 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v2f32" - )] - fn _vrndn_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrndn_f32(a) } +pub fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { + return vdupq_n_u32(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u32(n as _)) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56079,26 +56386,26 @@ pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v4f32" - )] - fn _vrndnq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrndnq_f32(a) } +pub fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { + return vdup_n_u64(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u64(n as _)) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56107,27 +56414,26 @@ pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v8i8" - )] - fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vrshl_s8(a, b) } +pub fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { + return vdupq_n_u64(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u64(n as _)) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56136,27 +56442,21 @@ pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v16i8" - )] - fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vrshlq_s8(a, b) } +pub fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56165,27 +56465,21 @@ pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v4i16" - )] - fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vrshl_s16(a, b) } +pub fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56194,27 +56488,21 @@ pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v8i16" - )] - fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vrshlq_s16(a, b) } +pub fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56223,27 +56511,21 @@ pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v2i32" - )] - fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vrshl_s32(a, b) } +pub fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56252,27 +56534,21 @@ pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v4i32" - )] - fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vrshlq_s32(a, b) } +pub fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56281,27 +56557,261 @@ pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v1i64" - )] - fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } - unsafe { _vrshl_s64(a, b) } +pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + vshiftlins_v8i8::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + vshiftlins_v16i8::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + vshiftlins_v4i16::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + vshiftlins_v8i16::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 0 && N <= 31); + vshiftlins_v2i32::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 31); + vshiftlins_v4i32::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 0 && N <= 63); + vshiftlins_v1i64::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 63); + vshiftlins_v2i64::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vshiftlins_v2i32::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vshiftlins_v4i32::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vshiftlins_v1i64::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vshiftlins_v2i64::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56310,27 +56820,21 @@ pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v2i64" - )] - fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } - unsafe { _vrshlq_s64(a, b) } +pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshr_n_s8::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56339,27 +56843,21 @@ pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v8i8" - )] - fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vrshl_u8(a, b) } +pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshrq_n_s8::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56368,27 +56866,21 @@ pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v16i8" - )] - fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vrshlq_u8(a, b) } +pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshr_n_s16::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56397,27 +56889,21 @@ pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v4i16" - )] - fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vrshl_u16(a, b) } +pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshrq_n_s16::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56426,27 +56912,21 @@ pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v8i16" - )] - fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vrshlq_u16(a, b) } +pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshr_n_s32::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56455,27 +56935,21 @@ pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v2i32" - )] - fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vrshl_u32(a, b) } +pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshrq_n_s32::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56484,27 +56958,21 @@ pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v4i32" - )] - fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vrshlq_u32(a, b) } +pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshr_n_s64::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56513,57 +56981,21 @@ pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v1i64" - )] - fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vrshl_u64(a, b) } -} -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v2i64" - )] - fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vrshlq_u64(a, b) } +pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshrq_n_s64::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56572,21 +57004,21 @@ pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { +pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert!(N >= 1 && N <= 8); - vrshl_s8(a, vdup_n_s8(-N as _)) + unsafe { simd_add(a, vshr_n_u8::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56595,21 +57027,21 @@ pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { +pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - vrshlq_s8(a, vdupq_n_s8(-N as _)) + unsafe { simd_add(a, vshrq_n_u8::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56618,21 +57050,21 @@ pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { +pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert!(N >= 1 && N <= 16); - vrshl_s16(a, vdup_n_s16(-N as _)) + unsafe { simd_add(a, vshr_n_u16::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56641,21 +57073,21 @@ pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { +pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - vrshlq_s16(a, vdupq_n_s16(-N as _)) + unsafe { simd_add(a, vshrq_n_u16::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56664,21 +57096,21 @@ pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { +pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert!(N >= 1 && N <= 32); - vrshl_s32(a, vdup_n_s32(-N as _)) + unsafe { simd_add(a, vshr_n_u32::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56687,21 +57119,21 @@ pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { +pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - vrshlq_s32(a, vdupq_n_s32(-N as _)) + unsafe { simd_add(a, vshrq_n_u32::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56710,21 +57142,21 @@ pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { +pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(N >= 1 && N <= 64); - vrshl_s64(a, vdup_n_s64(-N as _)) + unsafe { simd_add(a, vshr_n_u64::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56733,1128 +57165,1131 @@ pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { +pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(N >= 1 && N <= 64); - vrshlq_s64(a, vdupq_n_s64(-N as _)) + unsafe { simd_add(a, vshrq_n_u64::(b)) } } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - vrshl_u8(a, vdup_n_s8(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(1 <= N && N <= 8); + vshiftrins_v8i8::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - vrshlq_u8(a, vdupq_n_s8(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(1 <= N && N <= 8); + vshiftrins_v16i8::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - vrshl_u16(a, vdup_n_s16(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(1 <= N && N <= 16); + vshiftrins_v4i16::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - vrshlq_u16(a, vdupq_n_s16(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(1 <= N && N <= 16); + vshiftrins_v8i16::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - vrshl_u32(a, vdup_n_s32(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(1 <= N && N <= 32); + vshiftrins_v2i32::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - vrshlq_u32(a, vdupq_n_s32(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(1 <= N && N <= 32); + vshiftrins_v4i32::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - vrshl_u64(a, vdup_n_s64(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(1 <= N && N <= 64); + vshiftrins_v1i64::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - vrshlq_u64(a, vdupq_n_s64(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(1 <= N && N <= 64); + vshiftrins_v2i64::(a, b) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] - fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } - unsafe { _vrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] - fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } - unsafe { _vrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] - fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } - unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rshrn.v8i8" - )] - fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; - } - unsafe { _vrshrn_n_s16(a, N) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rshrn.v4i16" - )] - fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; - } - unsafe { _vrshrn_n_s32(a, N) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(1 <= N && N <= 32); + unsafe { transmute(vshiftrins_v2i32::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rshrn.v2i32" - )] - fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; - } - unsafe { _vrshrn_n_s64(a, N) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(1 <= N && N <= 32); + unsafe { transmute(vshiftrins_v4i32::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rshrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vrshrn_n_s16::(transmute(a))) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(1 <= N && N <= 64); + unsafe { transmute(vshiftrins_v1i64::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(1 <= N && N <= 64); + unsafe { transmute(vshiftrins_v2i64::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rshrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vrshrn_n_s32::(transmute(a))) } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + vst1_v4f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rshrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { transmute(vrshrn_n_s64::(transmute(a))) } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + vst1q_v8f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v4f16" - )] - fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")] + fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); } - unsafe { _vrsqrte_f16(a) } + _vst1_f16_x2(a, b.0, b.1) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v8f16" - )] - fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")] + fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); } - unsafe { _vrsqrteq_f16(a) } + _vst1q_f16_x2(a, b.0, b.1) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v2f32" + link_name = "llvm.aarch64.neon.st1x2.v4f16.p0" )] - fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t; + fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); } - unsafe { _vrsqrte_f32(a) } + _vst1_f16_x2(b.0, b.1, a) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v4f32" + link_name = "llvm.aarch64.neon.st1x2.v8f16.p0" )] - fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t; + fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); } - unsafe { _vrsqrteq_f32(a) } + _vst1q_f16_x2(b.0, b.1, a) } -#[doc = "Unsigned reciprocal square root estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t { +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ursqrte.v2i32" - )] - fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")] + fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); } - unsafe { _vrsqrte_u32(a) } + _vst1_f16_x3(a, b.0, b.1, b.2) } -#[doc = "Unsigned reciprocal square root estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ursqrte.v4i32" - )] - fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")] + fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); } - unsafe { _vrsqrteq_u32(a) } + _vst1q_f16_x3(a, b.0, b.1, b.2) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v4f16" + link_name = "llvm.aarch64.neon.st1x3.v4f16.p0" )] - fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); } - unsafe { _vrsqrts_f16(a, b) } + _vst1_f16_x3(b.0, b.1, b.2, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v8f16" + link_name = "llvm.aarch64.neon.st1x3.v8f16.p0" )] - fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); } - unsafe { _vrsqrtsq_f16(a, b) } + _vst1q_f16_x3(b.0, b.1, b.2, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v2f32" - )] - fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")] + fn _vst1_f16_x4( + ptr: *mut f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ); } - unsafe { _vrsqrts_f32(a, b) } + _vst1_f16_x4(a, b.0, b.1, b.2, b.3) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")] + fn _vst1q_f16_x4( + ptr: *mut f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ); + } + _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v4f32" + link_name = "llvm.aarch64.neon.st1x4.v4f16.p0" )] - fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + fn _vst1_f16_x4( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ptr: *mut f16, + ); } - unsafe { _vrsqrtsq_f32(a, b) } + _vst1_f16_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshr_n_s8::(b)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8f16.p0" + )] + fn _vst1q_f16_x4( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ptr: *mut f16, + ); + } + _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshrq_n_s8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2f32::(ptr as *const i8, transmute(a)) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshr_n_s16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4f32::(ptr as *const i8, transmute(a)) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshrq_n_s16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshr_n_s32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshrq_n_s32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshr_n_s64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshrq_n_s64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshr_n_u8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshrq_n_u8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshr_n_u16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshrq_n_u16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshr_n_u32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshrq_n_u32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshr_n_u64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshrq_n_u64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, transmute(a)) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")] + fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t); + } + _vst1_f32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")] + fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t); + } + _vst1q_f32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rsubhn.v8i8" + link_name = "llvm.aarch64.neon.st1x2.v2f32.p0" )] - fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; + fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32); } - unsafe { _vrsubhn_s16(a, b) } + _vst1_f32_x2(b.0, b.1, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rsubhn.v4i16" + link_name = "llvm.aarch64.neon.st1x2.v4f32.p0" )] - fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32); } - unsafe { _vrsubhn_s32(a, b) } + _vst1q_f32_x2(b.0, b.1, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2f32.p0" + )] + fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); + } + _vst1_f32_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4f32.p0" + )] + fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); + } + _vst1q_f32_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")] + fn _vst1_f32_x4( + ptr: *mut f32, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + ); + } + _vst1_f32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")] + fn _vst1q_f32_x4( + ptr: *mut f32, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + ); + } + _vst1q_f32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2f32.p0" + )] + fn _vst1_f32_x4( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + ptr: *mut f32, + ); + } + _vst1_f32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4f32.p0" + )] + fn _vst1q_f32_x4( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + ptr: *mut f32, + ); + } + _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] -pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rsubhn.v2i32" - )] - fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; - } - unsafe { _vrsubhn_s64(a, b) } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57863,20 +58298,23 @@ pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) } +pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57885,25 +58323,23 @@ pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vrsubhn_s16(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57912,20 +58348,23 @@ pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) } +pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57934,25 +58373,23 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vrsubhn_s32(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { + static_assert_uimm_bits!(LANE, 4); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57961,72 +58398,14 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) } -} -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); - let ret_val: uint32x2_t = transmute(vrsubhn_s64(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { +pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } -} -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58044,12 +58423,14 @@ pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58067,12 +58448,14 @@ pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58090,12 +58473,14 @@ pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58113,12 +58498,14 @@ pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(LANE, 4); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58136,12 +58523,14 @@ pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58159,12 +58548,14 @@ pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { + static_assert_uimm_bits!(LANE, 4); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58182,12 +58573,14 @@ pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58205,12 +58598,14 @@ pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58228,12 +58623,14 @@ pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { +pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58251,12 +58648,14 @@ pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58274,12 +58673,14 @@ pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(LANE, 4); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58297,12 +58698,14 @@ pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58320,12 +58723,14 @@ pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { + static_assert_uimm_bits!(LANE, 4); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58343,12 +58748,14 @@ pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58366,15 +58773,17 @@ pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -58389,12 +58798,14 @@ pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58412,12 +58823,14 @@ pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58435,21 +58848,22 @@ pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(LANE, 4); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -58458,21 +58872,21 @@ pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -58481,21 +58895,21 @@ pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -58504,21 +58918,21 @@ pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { - static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -58527,21 +58941,21 @@ pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { - static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -58550,21 +58964,21 @@ pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { - static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -58573,504 +58987,861 @@ pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) } -#[doc = "SHA1 hash update accelerator, choose."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1c))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1c" + link_name = "llvm.aarch64.neon.st1x2.v8i8.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] - fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8); } - unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) } + _vst1_s8_x2(b.0, b.1, a) } -#[doc = "SHA1 fixed rotate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1h))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1h_u32(hash_e: u32) -> u32 { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1h" + link_name = "llvm.aarch64.neon.st1x2.v16i8.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")] - fn _vsha1h_u32(hash_e: u32) -> u32; + fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8); } - unsafe { _vsha1h_u32(hash_e) } + _vst1q_s8_x2(b.0, b.1, a) } -#[doc = "SHA1 hash update accelerator, majority"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1m))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1m" + link_name = "llvm.aarch64.neon.st1x2.v4i16.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] - fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16); } - unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) } + _vst1_s16_x2(b.0, b.1, a) } -#[doc = "SHA1 hash update accelerator, parity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1p))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1p" + link_name = "llvm.aarch64.neon.st1x2.v8i16.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] - fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16); } - unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) } + _vst1q_s16_x2(b.0, b.1, a) } -#[doc = "SHA1 schedule update accelerator, first part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1su0))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1su0" + link_name = "llvm.aarch64.neon.st1x2.v2i32.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] - fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; + fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32); } - unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) } + _vst1_s32_x2(b.0, b.1, a) } -#[doc = "SHA1 schedule update accelerator, second part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1su1))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1su1" + link_name = "llvm.aarch64.neon.st1x2.v4i32.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] - fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32); } - unsafe { _vsha1su1q_u32(tw0_3, w12_15) } + _vst1q_s32_x2(b.0, b.1, a) } -#[doc = "SHA1 schedule update accelerator, upper part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256h2))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256h2" + link_name = "llvm.aarch64.neon.st1x2.v1i64.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] - fn _vsha256h2q_u32( - hash_abcd: uint32x4_t, - hash_efgh: uint32x4_t, - wk: uint32x4_t, - ) -> uint32x4_t; + fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64); } - unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) } + _vst1_s64_x2(b.0, b.1, a) } -#[doc = "SHA1 schedule update accelerator, first part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256h))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256h" + link_name = "llvm.aarch64.neon.st1x2.v2i64.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] - fn _vsha256hq_u32( - hash_abcd: uint32x4_t, - hash_efgh: uint32x4_t, - wk: uint32x4_t, - ) -> uint32x4_t; + fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64); } - unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) } + _vst1q_s64_x2(b.0, b.1, a) } -#[doc = "SHA256 schedule update accelerator, first part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256su0))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256su0" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] - fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")] + fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t); } - unsafe { _vsha256su0q_u32(w0_3, w4_7) } + _vst1_s8_x2(a, b.0, b.1) } -#[doc = "SHA256 schedule update accelerator, second part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256su1))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256su1" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] - fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) - -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")] + fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t); } - unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } + _vst1q_s8_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] - fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")] + fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t); } - unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) } + _vst1_s16_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] - fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")] + fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t); } - unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) } + _vst1q_s16_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] - fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")] + fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t); } - unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) } + _vst1_s32_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] - fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")] + fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t); } - unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) } + _vst1q_s32_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] - fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")] + fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t); } - unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) } + _vst1_s64_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] - fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")] + fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t); } - unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) } + _vst1q_s64_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] - fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8i8.p0" + )] + fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); } - unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) } + _vst1_s8_x3(b.0, b.1, b.2, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] - fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v16i8.p0" + )] + fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); } - unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) } + _vst1q_s8_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] - fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4i16.p0" + )] + fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); } - unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) } + _vst1_s16_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] - fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8i16.p0" + )] + fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); } - unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) } + _vst1q_s16_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] - fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2i32.p0" + )] + fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); } - unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) } + _vst1_s32_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] - fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4i32.p0" + )] + fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); } - unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) } + _vst1q_s32_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] - fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v1i64.p0" + )] + fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); } - unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) } + _vst1_s64_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] - fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2i64.p0" + )] + fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); } - unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) } + _vst1q_s64_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] - fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")] + fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); } - unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) } + _vst1_s8_x3(a, b.0, b.1, b.2) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] - fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")] + fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); } - unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) } + _vst1q_s8_x3(a, b.0, b.1, b.2) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")] + fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); + } + _vst1_s16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")] + fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); + } + _vst1q_s16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")] + fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); + } + _vst1_s32_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")] + fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); + } + _vst1q_s32_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")] + fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); + } + _vst1_s64_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")] + fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); + } + _vst1q_s64_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8i8.p0" + )] + fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } + _vst1_s8_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v16i8.p0" + )] + fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } + _vst1q_s8_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4i16.p0" + )] + fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); + } + _vst1_s16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8i16.p0" + )] + fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); + } + _vst1q_s16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2i32.p0" + )] + fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); + } + _vst1_s32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4i32.p0" + )] + fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); + } + _vst1q_s32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v1i64.p0" + )] + fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); + } + _vst1_s64_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2i64.p0" + )] + fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); + } + _vst1q_s64_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")] + fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); + } + _vst1_s8_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")] + fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); + } + _vst1q_s8_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")] + fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); + } + _vst1_s16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")] + fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); + } + _vst1q_s16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")] + fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); + } + _vst1_s32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")] + fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); + } + _vst1q_s32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")] + fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); + } + _vst1_s64_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")] + fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); + } + _vst1q_s64_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59079,21 +59850,21 @@ fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_s8(a: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdup_n_s8(N as _)) } +pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59102,21 +59873,21 @@ pub fn vshl_n_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdupq_n_s8(N as _)) } +pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59125,21 +59896,21 @@ pub fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_s16(a: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdup_n_s16(N as _)) } +pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59148,21 +59919,21 @@ pub fn vshl_n_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdupq_n_s16(N as _)) } +pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59171,21 +59942,21 @@ pub fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_s32(a: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdup_n_s32(N as _)) } +pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59194,21 +59965,21 @@ pub fn vshl_n_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdupq_n_s32(N as _)) } +pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59217,21 +59988,21 @@ pub fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdup_n_s64(N as _)) } +pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59240,21 +60011,21 @@ pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdupq_n_s64(N as _)) } -} -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"] +pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59263,21 +60034,21 @@ pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdup_n_u8(N as _)) } +pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59286,21 +60057,21 @@ pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdupq_n_u8(N as _)) } +pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59309,21 +60080,21 @@ pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdup_n_u16(N as _)) } +pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59332,21 +60103,21 @@ pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdupq_n_u16(N as _)) } +pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59355,21 +60126,21 @@ pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdup_n_u32(N as _)) } +pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { + vst1_s32_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59378,21 +60149,21 @@ pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdupq_n_u32(N as _)) } +pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { + vst1_s32_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59401,21 +60172,21 @@ pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdup_n_u64(N as _)) } +pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { + vst1_s32_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59424,19 +60195,20 @@ pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdupq_n_u64(N as _)) } +pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { + vst1q_s32_x2(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59446,26 +60218,20 @@ pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v8i8" - )] - fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vshl_s8(a, b) } +pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { + vst1q_s32_x3(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59475,26 +60241,20 @@ pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v16i8" - )] - fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vshlq_s8(a, b) } +pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { + vst1q_s32_x4(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59504,26 +60264,20 @@ pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v4i16" - )] - fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vshl_s16(a, b) } +pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59533,26 +60287,20 @@ pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v8i16" - )] - fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vshlq_s16(a, b) } +pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59562,26 +60310,20 @@ pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v2i32" - )] - fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vshl_s32(a, b) } +pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59591,26 +60333,20 @@ pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v4i32" - )] - fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vshlq_s32(a, b) } +pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59620,26 +60356,20 @@ pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v1i64" - )] - fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } - unsafe { _vshl_s64(a, b) } +pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59649,26 +60379,20 @@ pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v2i64" - )] - fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } - unsafe { _vshlq_s64(a, b) } +pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59678,26 +60402,20 @@ pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v8i8" - )] - fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vshl_u8(a, b) } +pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59707,26 +60425,20 @@ pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v16i8" - )] - fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vshlq_u8(a, b) } +pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59736,26 +60448,20 @@ pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v4i16" - )] - fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vshl_u16(a, b) } +pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59765,26 +60471,20 @@ pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v8i16" - )] - fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vshlq_u16(a, b) } +pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59794,26 +60494,20 @@ pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v2i32" - )] - fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vshl_u32(a, b) } +pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59823,26 +60517,20 @@ pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v4i32" - )] - fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vshlq_u32(a, b) } +pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59852,26 +60540,20 @@ pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v1i64" - )] - fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vshl_u64(a, b) } +pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -59881,28 +60563,21 @@ pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v2i64" - )] - fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vshlq_u64(a, b) } +pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59911,21 +60586,21 @@ pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 16); - unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) } +pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59934,21 +60609,21 @@ pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 32); - unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) } +pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59957,21 +60632,21 @@ pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { - static_assert!(N >= 0 && N <= 8); - unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) } +pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59980,287 +60655,198 @@ pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 16); - unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) } +pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushll, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 32); - unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] + fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); + } + _vst1_v1i64(addr, val, ALIGN) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushll, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { - static_assert!(N >= 0 && N <= 8); - unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] + fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); + } + _vst1_v2f32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_s8(a: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { 7 } else { N }; - unsafe { simd_shr(a, vdup_n_s8(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] + fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); + } + _vst1_v2i32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { 7 } else { N }; - unsafe { simd_shr(a, vdupq_n_s8(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] + fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); + } + _vst1_v4i16(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_s16(a: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { 15 } else { N }; - unsafe { simd_shr(a, vdup_n_s16(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] + fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); + } + _vst1_v8i8(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { 15 } else { N }; - unsafe { simd_shr(a, vdupq_n_s16(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] + fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); + } + _vst1q_v16i8(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_s32(a: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { 31 } else { N }; - unsafe { simd_shr(a, vdup_n_s32(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] + fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); + } + _vst1q_v2i64(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { 31 } else { N }; - unsafe { simd_shr(a, vdupq_n_s32(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] + fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); + } + _vst1q_v4f32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_s64(a: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { 63 } else { N }; - unsafe { simd_shr(a, vdup_n_s64(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] + fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); + } + _vst1q_v4i32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { 63 } else { N }; - unsafe { simd_shr(a, vdupq_n_s64(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] + fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); + } + _vst1q_v8i16(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { - return vdup_n_u8(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u8(n as _)) } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] + fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); + } + _vst1_v4f16(addr, val, align) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] + fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); + } + _vst1q_v8f16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) + assert_instr(nop, LANE = 0) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60269,570 +60855,686 @@ pub fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { - return vdupq_n_u8(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u8(n as _)) } +pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { - return vdup_n_u16(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u16(n as _)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4f16.p0" + )] + fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); + } + _vst2_f16(b.0, b.1, a as _) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { - return vdupq_n_u16(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u16(n as _)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8f16.p0" + )] + fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); + } + _vst2q_f16(b.0, b.1, a as _) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { - return vdup_n_u32(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u32(n as _)) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")] + fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); + } + _vst2_f16(a as _, b.0, b.1, 2) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { - return vdupq_n_u32(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u32(n as _)) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")] + fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); + } + _vst2q_f16(a as _, b.0, b.1, 2) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { - return vdup_n_u64(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u64(n as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { - return vdupq_n_u64(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u64(n as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - vshiftlins_v8i8::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")] + fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32); + } + _vst2_f32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - vshiftlins_v16i8::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")] + fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32); + } + _vst2q_f32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - vshiftlins_v4i16::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")] + fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32); + } + _vst2_s8(a as _, b.0, b.1, 1) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - vshiftlins_v8i16::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")] + fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32); + } + _vst2q_s8(a as _, b.0, b.1, 1) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 0 && N <= 31); - vshiftlins_v2i32::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")] + fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32); + } + _vst2_s16(a as _, b.0, b.1, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 31); - vshiftlins_v4i32::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")] + fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32); + } + _vst2q_s16(a as _, b.0, b.1, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 0 && N <= 63); - vshiftlins_v1i64::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")] + fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32); + } + _vst2_s32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 63); - vshiftlins_v2i64::(a, b) +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")] + fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32); + } + _vst2q_s32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f16.p0" + )] + fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8f16.p0" + )] + fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")] + fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); + } + _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")] + fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); + } + _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vshiftlins_v2i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2f32.p0" + )] + fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f32.p0" + )] + fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8i8.p0" + )] + fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s8(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4i16.p0" + )] + fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8i16.p0" + )] + fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2i32.p0" + )] + fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4i32.p0" + )] + fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vshiftlins_v4i32::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")] + fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32); + } + _vst2_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vshiftlins_v1i64::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")] + fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32); + } + _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vshiftlins_v2i64::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")] + fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32); + } + _vst2_lane_s8(a as _, b.0, b.1, LANE, 1) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")] + fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32); + } + _vst2_lane_s16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")] + fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32); + } + _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")] + fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32); + } + _vst2_lane_s32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")] + fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32); + } + _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60843,19 +61545,21 @@ pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshr_n_s8::(b)) } +pub unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60866,19 +61570,21 @@ pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshrq_n_s8::(b)) } +pub unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60889,19 +61595,21 @@ pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshr_n_s16::(b)) } +pub unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60912,19 +61620,21 @@ pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshrq_n_s16::(b)) } +pub unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60935,19 +61645,21 @@ pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshr_n_s32::(b)) } +pub unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2q_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60958,19 +61670,21 @@ pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshrq_n_s32::(b)) } +pub unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -60981,19 +61695,21 @@ pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshr_n_s64::(b)) } +pub unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61004,21 +61720,22 @@ pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshrq_n_s64::(b)) } +pub unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(nop) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61027,21 +61744,45 @@ pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshr_n_u8::(b)) } +pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { + vst2_s64(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr( +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(nop) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61050,21 +61791,21 @@ pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshrq_n_u8::(b)) } +pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) { + vst2_s64(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61073,21 +61814,21 @@ pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshr_n_u16::(b)) } +pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) { + vst2_s8(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61096,21 +61837,21 @@ pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshrq_n_u16::(b)) } +pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) { + vst2q_s8(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61119,21 +61860,21 @@ pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshr_n_u32::(b)) } +pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) { + vst2_s16(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61142,21 +61883,21 @@ pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshrq_n_u32::(b)) } +pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) { + vst2q_s16(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61165,21 +61906,21 @@ pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshr_n_u64::(b)) } +pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) { + vst2_s32(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61188,1580 +61929,2158 @@ pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshrq_n_u64::(b)) } +pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) { + vst2q_s32(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(1 <= N && N <= 8); - vshiftrins_v8i8::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) { + vst2_s8(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(1 <= N && N <= 8); - vshiftrins_v16i8::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) { + vst2q_s8(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(1 <= N && N <= 16); - vshiftrins_v4i16::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) { + vst2_s16(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(1 <= N && N <= 16); - vshiftrins_v8i16::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { + vst2q_s16(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(1 <= N && N <= 32); - vshiftrins_v2i32::(a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")] + fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); + } + _vst3_f16(a as _, b.0, b.1, b.2, 2) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(1 <= N && N <= 32); - vshiftrins_v4i32::(a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")] + fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); + } + _vst3q_f16(a as _, b.0, b.1, b.2, 2) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(1 <= N && N <= 64); - vshiftrins_v1i64::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4f16.p0" + )] + fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); + } + _vst3_f16(b.0, b.1, b.2, a as _) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(1 <= N && N <= 64); - vshiftrins_v2i64::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8f16.p0" + )] + fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); + } + _vst3q_f16(b.0, b.1, b.2, a as _) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(1 <= N && N <= 32); - unsafe { transmute(vshiftrins_v2i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(1 <= N && N <= 32); - unsafe { transmute(vshiftrins_v4i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(1 <= N && N <= 64); - unsafe { transmute(vshiftrins_v1i64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(1 <= N && N <= 64); - unsafe { transmute(vshiftrins_v2i64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { - vst1_v4f16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { - vst1q_v8f16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")] - fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")] + fn _vst3_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ); } - _vst1_f16_x2(a, b.0, b.1) + _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")] - fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")] + fn _vst3q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ); } - _vst1q_f16_x2(a, b.0, b.1) + _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4f16.p0" + link_name = "llvm.aarch64.neon.st3lane.v4f16.p0" )] - fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); + fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); } - _vst1_f16_x2(b.0, b.1, a) + _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v8f16.p0" + link_name = "llvm.aarch64.neon.st3lane.v8f16.p0" )] - fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); + fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); } - _vst1q_f16_x2(b.0, b.1, a) + _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")] - fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")] + fn _vst3_lane_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i32, + size: i32, + ); } - _vst1_f16_x3(a, b.0, b.1, b.2) + _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")] - fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")] + fn _vst3q_lane_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i32, + size: i32, + ); } - _vst1q_f16_x3(a, b.0, b.1, b.2) + _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4f16.p0" - )] - fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")] + fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32); } - _vst1_f16_x3(b.0, b.1, b.2, a) + _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v8f16.p0" - )] - fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")] + fn _vst3_lane_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i32, + size: i32, + ); } - _vst1q_f16_x3(b.0, b.1, b.2, a) + _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")] - fn _vst1_f16_x4( - ptr: *mut f16, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")] + fn _vst3q_lane_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i32, + size: i32, ); } - _vst1_f16_x4(a, b.0, b.1, b.2, b.3) + _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")] - fn _vst1q_f16_x4( - ptr: *mut f16, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")] + fn _vst3_lane_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i32, + size: i32, ); } - _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) + _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")] + fn _vst3q_lane_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4f16.p0" + link_name = "llvm.aarch64.neon.st3lane.v2f32.p0" )] - fn _vst1_f16_x4( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - ptr: *mut f16, - ); + fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8); } - _vst1_f16_x4(b.0, b.1, b.2, b.3, a) + _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v8f16.p0" + link_name = "llvm.aarch64.neon.st3lane.v4f32.p0" )] - fn _vst1q_f16_x4( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - ptr: *mut f16, - ); + fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8); } - _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) + _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v2f32::(ptr as *const i8, transmute(a)) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8i8.p0" + )] + fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v4f32::(ptr as *const i8, transmute(a)) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4i16.p0" + )] + fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v8i8::(ptr as *const i8, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v16i8::(ptr as *const i8, a) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8i16.p0" + )] + fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v4i16::(ptr as *const i8, a) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v2i32.p0" + )] + fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v8i16::(ptr as *const i8, a) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4i32.p0" + )] + fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v2i32::(ptr as *const i8, a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v4i32::(ptr as *const i8, a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v1i64::(ptr as *const i8, a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v2i64::(ptr as *const i8, a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v8i8::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3q_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v16i8::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v4i16::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v8i16::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v2i32::(ptr as *const i8, transmute(a)) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { + vst3_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v4i32::(ptr as *const i8, transmute(a)) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v1i64::(ptr as *const i8, transmute(a)) +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v2i64::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) { + vst3_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v8i8::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) { + vst3_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v16i8::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) { + vst3q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v4i16::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) { + vst3_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v8i16::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) { + vst3q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v1i64::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) { + vst3_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v2i64::(ptr as *const i8, transmute(a)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) { + vst3q_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")] - fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t); - } - _vst1_f32_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) { + vst3_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")] - fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t); - } - _vst1q_f32_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) { + vst3q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) { + vst3_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { + vst3q_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2f32.p0" - )] - fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")] + fn _vst4_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + size: i32, + ); } - _vst1_f32_x2(b.0, b.1, a) + _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4f32.p0" - )] - fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")] + fn _vst4q_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + size: i32, + ); } - _vst1q_f32_x2(b.0, b.1, a) + _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2f32.p0" + link_name = "llvm.aarch64.neon.st4.v4f16.p0" )] - fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); + fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); } - _vst1_f32_x3(b.0, b.1, b.2, a) + _vst4_f16(b.0, b.1, b.2, b.3, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4f32.p0" + link_name = "llvm.aarch64.neon.st4.v8f16.p0" )] - fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); + fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); } - _vst1q_f32_x3(b.0, b.1, b.2, a) + _vst4q_f16(b.0, b.1, b.2, b.3, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")] - fn _vst1_f32_x4( - ptr: *mut f32, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")] + fn _vst4_f32( + ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, + size: i32, ); } - _vst1_f32_x4(a, b.0, b.1, b.2, b.3) + _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")] - fn _vst1q_f32_x4( - ptr: *mut f32, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")] + fn _vst4q_f32( + ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, + size: i32, ); } - _vst1q_f32_x4(a, b.0, b.1, b.2, b.3) + _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2f32.p0" - )] - fn _vst1_f32_x4( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - ptr: *mut f32, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")] + fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32); + } + _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")] + fn _vst4q_s8( + ptr: *mut i8, + a: int8x16_t, + b: int8x16_t, + c: int8x16_t, + d: int8x16_t, + size: i32, ); } - _vst1_f32_x4(b.0, b.1, b.2, b.3, a) + _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4f32.p0" - )] - fn _vst1q_f32_x4( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - ptr: *mut f32, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")] + fn _vst4_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + size: i32, ); } - _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) + _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")] + fn _vst4q_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + size: i32, + ); + } + _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")] + fn _vst4_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + size: i32, + ); + } + _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")] + fn _vst4q_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + size: i32, + ); + } + _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { - static_assert_uimm_bits!(LANE, 4); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")] + fn _vst4_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ); + } + _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { - static_assert_uimm_bits!(LANE, 4); - *a = simd_extract!(b, LANE as u32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")] + fn _vst4q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4f16.p0" + )] + fn _vst4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8f16.p0" + )] + fn _vst4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")] + fn _vst4_lane_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i32, + size: i32, + ); + } + _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")] + fn _vst4q_lane_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")] + fn _vst4_lane_s8( + ptr: *mut i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i32, + size: i32, + ); + } + _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { - static_assert_uimm_bits!(LANE, 4); - *a = simd_extract!(b, LANE as u32); +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")] + fn _vst4_lane_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i32, + size: i32, + ); + } + _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")] + fn _vst4q_lane_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")] + fn _vst4_lane_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i32, + size: i32, + ); + } + _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")] + fn _vst4q_lane_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v2f32.p0" + )] + fn _vst4_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4f32.p0" + )] + fn _vst4q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8i8.p0" + )] + fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8); + } + _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4i16.p0" + )] + fn _vst4_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8i16.p0" + )] + fn _vst4q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v2i32.p0" + )] + fn _vst4_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4i32.p0" + )] + fn _vst4q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] #[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), @@ -62771,21 +64090,21 @@ pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st4, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -62796,21 +64115,21 @@ pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st4, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -62821,21 +64140,21 @@ pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st4, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -62846,21 +64165,21 @@ pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st4, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -62871,22 +64190,23 @@ pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4q_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62895,21 +64215,23 @@ pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { - vst1_s64_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62918,21 +64240,23 @@ pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { - vst1_s64_x3(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62941,20 +64265,21 @@ pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { - vst1_s64_x4(transmute(a), transmute(b)) +pub unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -62964,20 +64289,44 @@ pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { - vst1q_s64_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { + vst4_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -62987,20 +64336,20 @@ pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { - vst1q_s64_x3(transmute(a), transmute(b)) +pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) { + vst4_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -63010,860 +64359,700 @@ pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) { - vst1q_s64_x4(transmute(a), transmute(b)) +pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) { + vst4_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v8i8.p0" - )] - fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8); - } - _vst1_s8_x2(b.0, b.1, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) { + vst4q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v16i8.p0" - )] - fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8); - } - _vst1q_s8_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4i16.p0" - )] - fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16); - } - _vst1_s16_x2(b.0, b.1, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) { + vst4_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v8i16.p0" - )] - fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16); - } - _vst1q_s16_x2(b.0, b.1, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) { + vst4q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2i32.p0" - )] - fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32); - } - _vst1_s32_x2(b.0, b.1, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) { + vst4_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4i32.p0" - )] - fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32); - } - _vst1q_s32_x2(b.0, b.1, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) { + vst4q_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v1i64.p0" - )] - fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64); - } - _vst1_s64_x2(b.0, b.1, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) { + vst4_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2i64.p0" - )] - fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64); - } - _vst1q_s64_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")] - fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t); - } - _vst1_s8_x2(a, b.0, b.1) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) { + vst4q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")] - fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t); - } - _vst1q_s8_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) { + vst4_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")] - fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t); - } - _vst1_s16_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { + vst4q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "Store SIMD&FP register (immediate offset)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")] - fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t); - } - _vst1q_s16_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vstrq_p128(a: *mut p128, b: p128) { + *a = b } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")] - fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t); - } - _vst1_s32_x2(a, b.0, b.1) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")] - fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t); - } - _vst1q_s32_x2(a, b.0, b.1) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")] - fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t); - } - _vst1_s64_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")] - fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t); - } - _vst1q_s64_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v8i8.p0" - )] - fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); - } - _vst1_s8_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v16i8.p0" - )] - fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); - } - _vst1q_s8_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4i16.p0" - )] - fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); - } - _vst1_s16_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v8i16.p0" - )] - fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); - } - _vst1q_s16_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2i32.p0" - )] - fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); - } - _vst1_s32_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4i32.p0" - )] - fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); - } - _vst1q_s32_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v1i64.p0" - )] - fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); - } - _vst1_s64_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2i64.p0" - )] - fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); - } - _vst1q_s64_x3(b.0, b.1, b.2, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")] - fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); - } - _vst1_s8_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")] - fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); - } - _vst1q_s8_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")] - fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); - } - _vst1_s16_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")] - fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); - } - _vst1q_s16_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")] - fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); - } - _vst1_s32_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")] - fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); - } - _vst1q_s32_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")] - fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); - } - _vst1_s64_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")] - fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); - } - _vst1q_s64_x3(a, b.0, b.1, b.2) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v8i8.p0" - )] - fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); - } - _vst1_s8_x4(b.0, b.1, b.2, b.3, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let d = vsubhn_s16(b, c); + vcombine_s8(a, d) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v16i8.p0" - )] - fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); - } - _vst1q_s8_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4i16.p0" - )] - fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); - } - _vst1_s16_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v8i16.p0" - )] - fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); - } - _vst1q_s16_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2i32.p0" - )] - fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); - } - _vst1_s32_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4i32.p0" - )] - fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); - } - _vst1q_s32_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v1i64.p0" - )] - fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); - } - _vst1_s64_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2i64.p0" - )] - fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); - } - _vst1q_s64_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")] - fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); - } - _vst1_s8_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")] - fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); - } - _vst1q_s8_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")] - fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); - } - _vst1_s16_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")] - fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); - } - _vst1q_s16_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")] - fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); - } - _vst1_s32_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")] - fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); - } - _vst1q_s32_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")] - fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); - } - _vst1_s64_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")] - fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); - } - _vst1q_s64_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -63873,20 +65062,23 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { - vst1_s8_x2(transmute(a), transmute(b)) +pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let d = vsubhn_s32(b, c); + vcombine_s16(a, d) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -63896,20 +65088,23 @@ pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { - vst1_s8_x3(transmute(a), transmute(b)) +pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let d = vsubhn_s64(b, c); + vcombine_s32(a, d) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -63919,20 +65114,23 @@ pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { - vst1_s8_x4(transmute(a), transmute(b)) +pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let d = vsubhn_u16(b, c); + vcombine_u8(a, d) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -63942,20 +65140,23 @@ pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { - vst1q_s8_x2(transmute(a), transmute(b)) +pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let d = vsubhn_u32(b, c); + vcombine_u16(a, d) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -63965,20 +65166,19 @@ pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { - vst1q_s8_x3(transmute(a), transmute(b)) +pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let d = vsubhn_u64(b, c); + vcombine_u32(a, d) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(subhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -63988,20 +65188,19 @@ pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { - vst1q_s8_x4(transmute(a), transmute(b)) +pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(subhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -64011,20 +65210,19 @@ pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { - vst1_s16_x2(transmute(a), transmute(b)) +pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + let c: i32x4 = i32x4::new(16, 16, 16, 16); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(subhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -64034,20 +65232,19 @@ pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { - vst1_s16_x3(transmute(a), transmute(b)) +pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + let c: i64x2 = i64x2::new(32, 32); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(subhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -64057,20 +65254,19 @@ pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { - vst1_s16_x4(transmute(a), transmute(b)) +pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(subhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -64080,20 +65276,19 @@ pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { - vst1q_s16_x2(transmute(a), transmute(b)) +pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let c: u32x4 = u32x4::new(16, 16, 16, 16); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(subhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -64103,20 +65298,19 @@ pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { - vst1q_s16_x3(transmute(a), transmute(b)) +pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let c: u64x2 = u64x2::new(32, 32); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(ssubl) )] #[cfg_attr( not(target_arch = "arm"), @@ -64126,20 +65320,22 @@ pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { - vst1q_s16_x4(transmute(a), transmute(b)) +pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { + let c: int16x8_t = simd_cast(a); + let d: int16x8_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(ssubl) )] #[cfg_attr( not(target_arch = "arm"), @@ -64149,20 +65345,22 @@ pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { - vst1_s32_x2(transmute(a), transmute(b)) +pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { + let c: int32x4_t = simd_cast(a); + let d: int32x4_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(ssubl) )] #[cfg_attr( not(target_arch = "arm"), @@ -64172,20 +65370,22 @@ pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { - vst1_s32_x3(transmute(a), transmute(b)) +pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { + let c: int64x2_t = simd_cast(a); + let d: int64x2_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(usubl) )] #[cfg_attr( not(target_arch = "arm"), @@ -64195,43 +65395,22 @@ pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { - vst1_s32_x4(transmute(a), transmute(b)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { - vst1q_s32_x2(transmute(a), transmute(b)) +pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { + let c: uint16x8_t = simd_cast(a); + let d: uint16x8_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(usubl) )] #[cfg_attr( not(target_arch = "arm"), @@ -64241,20 +65420,22 @@ pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { - vst1q_s32_x3(transmute(a), transmute(b)) +pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { + let c: uint32x4_t = simd_cast(a); + let d: uint32x4_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(usubl) )] #[cfg_attr( not(target_arch = "arm"), @@ -64264,20 +65445,22 @@ pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { - vst1q_s32_x4(transmute(a), transmute(b)) +pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { + let c: uint64x2_t = simd_cast(a); + let d: uint64x2_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(ssubw) )] #[cfg_attr( not(target_arch = "arm"), @@ -64287,20 +65470,18 @@ pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { - vst1_s64_x2(transmute(a), transmute(b)) +pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(ssubw) )] #[cfg_attr( not(target_arch = "arm"), @@ -64310,20 +65491,18 @@ pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { - vst1_s64_x3(transmute(a), transmute(b)) +pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(ssubw) )] #[cfg_attr( not(target_arch = "arm"), @@ -64333,20 +65512,18 @@ pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { - vst1_s64_x4(transmute(a), transmute(b)) +pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(usubw) )] #[cfg_attr( not(target_arch = "arm"), @@ -64356,20 +65533,18 @@ pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { - vst1q_s64_x2(transmute(a), transmute(b)) +pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(usubw) )] #[cfg_attr( not(target_arch = "arm"), @@ -64379,20 +65554,18 @@ pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { - vst1q_s64_x3(transmute(a), transmute(b)) +pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(usubw) )] #[cfg_attr( not(target_arch = "arm"), @@ -64402,4608 +65575,750 @@ pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { - vst1q_s64_x4(transmute(a), transmute(b)) +pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 0) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { - vst1_s8_x2(transmute(a), transmute(b)) +pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_u32_u8(c); + let c = vdup_lane_u32::(c); + vusdot_s32(a, vreinterpret_u8_u32(c), b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 0) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { - vst1_s8_x3(transmute(a), transmute(b)) +pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_u32_u8(c); + let c = vdupq_lane_u32::(c); + vusdotq_s32(a, vreinterpretq_u8_u32(c), b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 3) )] -pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { - vst1_s8_x4(transmute(a), transmute(b)) +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vsudot_laneq_s32(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_u32_u8(c); + let c = vdup_laneq_u32::(c); + vusdot_s32(a, vreinterpret_u8_u32(c), b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 3) )] -pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { - vst1q_s8_x2(transmute(a), transmute(b)) +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vsudotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_u32_u8(c); + let c = vdupq_laneq_u32::(c); + vusdotq_s32(a, vreinterpretq_u8_u32(c), b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { - vst1q_s8_x3(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")] + fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vtbl1(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { - vst1q_s8_x4(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vtbl1(a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { - vst1_s16_x2(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbl1(transmute(a), transmute(b))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { - vst1_s16_x3(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbl1(transmute(a), transmute(b))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { - vst1_s16_x4(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")] + fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vtbl2(a, b, c) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { - vst1q_s16_x2(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vtbl2(a.0, a.1, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { - vst1q_s16_x3(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { - vst1q_s16_x4(transmute(a), transmute(b)) +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t) { +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] - fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")] + fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; } - _vst1_v1i64(addr, val, ALIGN) + unsafe { _vtbl3(a, b, c, d) } } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] - fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); - } - _vst1_v2f32(addr, val, ALIGN) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + vtbl3(a.0, a.1, a.2, b) } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] - fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) } - _vst1_v2i32(addr, val, ALIGN) } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] - fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) } - _vst1_v4i16(addr, val, ALIGN) } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t) { +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] - fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")] + fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; } - _vst1_v8i8(addr, val, ALIGN) + unsafe { _vtbl4(a, b, c, d, e) } } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] - fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); - } - _vst1q_v16i8(addr, val, ALIGN) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + vtbl4(a.0, a.1, a.2, a.3, b) } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] - fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) } - _vst1q_v2i64(addr, val, ALIGN) } +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] - fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) } - _vst1q_v4f32(addr, val, ALIGN) } +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t) { +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] - fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")] + fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; } - _vst1q_v4i32(addr, val, ALIGN) + unsafe { _vtbx1(a, b, c) } } +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] - fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); - } - _vst1q_v8i16(addr, val, ALIGN) +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + vtbx1(a, b, c) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] #[inline] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] - fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); - } - _vst1_v4f16(addr, val, align) +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] #[inline] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] - fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); - } - _vst1q_v8f16(addr, val, align) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4f16.p0" - )] - fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")] + fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; } - _vst2_f16(b.0, b.1, a as _) + unsafe { _vtbx2(a, b, c, d) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v8f16.p0" - )] - fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); - } - _vst2q_f16(b.0, b.1, a as _) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + vtbx2(a, b.0, b.1, c) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")] - fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) } - _vst2_f16(a as _, b.0, b.1, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")] - fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) } - _vst2q_f16(a as _, b.0, b.1, 2) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")] - fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")] + fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; } - _vst2_f32(a as _, b.0, b.1, 4) + unsafe { _vtbx3(a, b, c, d, e) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")] - fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32); - } - _vst2q_f32(a as _, b.0, b.1, 4) +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + vtbx3(a, b.0, b.1, b.2, c) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")] - fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) } - _vst2_s8(a as _, b.0, b.1, 1) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")] - fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) } - _vst2q_s8(a as _, b.0, b.1, 1) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")] - fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32); - } - _vst2_s16(a as _, b.0, b.1, 2) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")] - fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32); - } - _vst2q_s16(a as _, b.0, b.1, 2) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")] - fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")] + fn _vtbx4( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + e: int8x8_t, + f: int8x8_t, + ) -> int8x8_t; } - _vst2_s32(a as _, b.0, b.1, 4) + unsafe { _vtbx4(a, b, c, d, e, f) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] #[inline] -#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")] - fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32); - } - _vst2q_s32(a as _, b.0, b.1, 4) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4f16.p0" - )] - fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); - } - _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v8f16.p0" - )] - fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")] - fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); - } - _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")] - fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); - } - _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2f32.p0" - )] - fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8); - } - _vst2_lane_f32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4f32.p0" - )] - fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v8i8.p0" - )] - fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8); - } - _vst2_lane_s8(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4i16.p0" - )] - fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8); - } - _vst2_lane_s16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v8i16.p0" - )] - fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2i32.p0" - )] - fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8); - } - _vst2_lane_s32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4i32.p0" - )] - fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")] - fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + unsafe { + vtbx4( + a, + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + ) } - _vst2_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] #[inline] -#[cfg(target_arch = "arm")] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")] - fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32); - } - _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")] - fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + let mut b: int8x8x4_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); + b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); + b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); + b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vtbx4( + a, + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst2_lane_s8(a as _, b.0, b.1, LANE, 1) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")] - fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32); - } - _vst2_lane_s16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")] - fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) } - _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")] - fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32); - } - _vst2_lane_s32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")] - fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) } - _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - vst2_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - vst2_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - vst2q_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - vst2_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { - vst2_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) { - vst2_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) { - vst2_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) { - vst2q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) { - vst2_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) { - vst2q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) { - vst2_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) { - vst2q_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) { - vst2_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) { - vst2q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) { - vst2_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { - vst2q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")] - fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); - } - _vst3_f16(a as _, b.0, b.1, b.2, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")] - fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); - } - _vst3q_f16(a as _, b.0, b.1, b.2, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v4f16.p0" - )] - fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); - } - _vst3_f16(b.0, b.1, b.2, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v8f16.p0" - )] - fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); - } - _vst3q_f16(b.0, b.1, b.2, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")] - fn _vst3_lane_f16( - ptr: *mut i8, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - n: i32, - size: i32, - ); - } - _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")] - fn _vst3q_lane_f16( - ptr: *mut i8, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4f16.p0" - )] - fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); - } - _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v8f16.p0" - )] - fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")] - fn _vst3_lane_f32( - ptr: *mut i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - n: i32, - size: i32, - ); - } - _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")] - fn _vst3q_lane_f32( - ptr: *mut i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")] - fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32); - } - _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")] - fn _vst3_lane_s16( - ptr: *mut i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - n: i32, - size: i32, - ); - } - _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")] - fn _vst3q_lane_s16( - ptr: *mut i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")] - fn _vst3_lane_s32( - ptr: *mut i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - n: i32, - size: i32, - ); - } - _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")] - fn _vst3q_lane_s32( - ptr: *mut i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2f32.p0" - )] - fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8); - } - _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4f32.p0" - )] - fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v8i8.p0" - )] - fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8); - } - _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4i16.p0" - )] - fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8); - } - _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v8i16.p0" - )] - fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2i32.p0" - )] - fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8); - } - _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4i32.p0" - )] - fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - vst3_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - vst3_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - vst3q_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - vst3_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { - vst3_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) { - vst3_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) { - vst3_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) { - vst3q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) { - vst3_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) { - vst3q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) { - vst3_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) { - vst3q_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) { - vst3_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) { - vst3q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) { - vst3_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { - vst3q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")] - fn _vst4_f16( - ptr: *mut i8, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - size: i32, - ); - } - _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")] - fn _vst4q_f16( - ptr: *mut i8, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - size: i32, - ); - } - _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4f16.p0" - )] - fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); - } - _vst4_f16(b.0, b.1, b.2, b.3, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v8f16.p0" - )] - fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); - } - _vst4q_f16(b.0, b.1, b.2, b.3, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")] - fn _vst4_f32( - ptr: *mut i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - size: i32, - ); - } - _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")] - fn _vst4q_f32( - ptr: *mut i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - size: i32, - ); - } - _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")] - fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32); - } - _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")] - fn _vst4q_s8( - ptr: *mut i8, - a: int8x16_t, - b: int8x16_t, - c: int8x16_t, - d: int8x16_t, - size: i32, - ); - } - _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")] - fn _vst4_s16( - ptr: *mut i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - size: i32, - ); - } - _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")] - fn _vst4q_s16( - ptr: *mut i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - size: i32, - ); - } - _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")] - fn _vst4_s32( - ptr: *mut i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - size: i32, - ); - } - _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")] - fn _vst4q_s32( - ptr: *mut i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - size: i32, - ); - } - _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")] - fn _vst4_lane_f16( - ptr: *mut i8, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i32, - size: i32, - ); - } - _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")] - fn _vst4q_lane_f16( - ptr: *mut i8, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4f16.p0" - )] - fn _vst4_lane_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v8f16.p0" - )] - fn _vst4q_lane_f16( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")] - fn _vst4_lane_f32( - ptr: *mut i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i32, - size: i32, - ); - } - _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")] - fn _vst4q_lane_f32( - ptr: *mut i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")] - fn _vst4_lane_s8( - ptr: *mut i8, - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - n: i32, - size: i32, - ); - } - _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")] - fn _vst4_lane_s16( - ptr: *mut i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i32, - size: i32, - ); - } - _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")] - fn _vst4q_lane_s16( - ptr: *mut i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")] - fn _vst4_lane_s32( - ptr: *mut i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i32, - size: i32, - ); - } - _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")] - fn _vst4q_lane_s32( - ptr: *mut i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2f32.p0" - )] - fn _vst4_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4f32.p0" - )] - fn _vst4q_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v8i8.p0" - )] - fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8); - } - _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4i16.p0" - )] - fn _vst4_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v8i16.p0" - )] - fn _vst4q_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2i32.p0" - )] - fn _vst4_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4i32.p0" - )] - fn _vst4q_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - vst4_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - vst4_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - vst4q_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - vst4_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { - vst4_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) { - vst4_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) { - vst4_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) { - vst4q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) { - vst4_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) { - vst4q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) { - vst4_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) { - vst4q_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) { - vst4_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) { - vst4q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) { - vst4_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { - vst4q_s16(transmute(a), transmute(b)) -} -#[doc = "Store SIMD&FP register (immediate offset)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vstrq_p128(a: *mut p128, b: p128) { - *a = b -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"] -#[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_sub(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_sub(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: float16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_sub(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_sub(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: float16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(zip1) )] -pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69013,43 +66328,27 @@ pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_sub(a, b) } +pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(zip1) )] -pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(subhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69059,49 +66358,32 @@ pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let d = vsubhn_s16(b, c); - vcombine_s8(a, d) +pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: float32x2x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(subhn2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip1) )] -pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let d = vsubhn_s32(b, c); - vcombine_s16(a, d) -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(subhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69111,49 +66393,27 @@ pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let d = vsubhn_s64(b, c); - vcombine_s32(a, d) +pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(subhn2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip1) )] -pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - let d = vsubhn_u16(b, c); - vcombine_u8(a, d) -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(subhn2) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69163,45 +66423,32 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - let d = vsubhn_u32(b, c); - vcombine_u16(a, d) +pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: int32x2x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(subhn2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip1) )] -pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - let d = vsubhn_u64(b, c); - vcombine_u32(a, d) -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69211,41 +66458,27 @@ pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { - let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(zip1) )] -pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { - let c: i32x4 = i32x4::new(16, 16, 16, 16); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69255,41 +66488,32 @@ pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { - let c: i64x2 = i64x2::new(32, 32); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: uint32x2x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69299,19 +66523,27 @@ pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - let c: u32x4 = u32x4::new(16, 16, 16, 16); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + unsafe { + let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69321,19 +66553,32 @@ pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - let c: u64x2 = u64x2::new(32, 32); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: float32x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69343,22 +66588,27 @@ pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { +pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let c: int16x8_t = simd_cast(a); - let d: int16x8_t = simd_cast(b); - simd_sub(c, d) + let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubl) + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69368,22 +66618,32 @@ pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { +pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let c: int32x4_t = simd_cast(a); - let d: int32x4_t = simd_cast(b); - simd_sub(c, d) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: int8x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69393,22 +66653,35 @@ pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { +pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let c: int64x2_t = simd_cast(a); - let d: int64x2_t = simd_cast(b); - simd_sub(c, d) + let a1: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69418,22 +66691,50 @@ pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { +pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let c: uint16x8_t = simd_cast(a); - let d: uint16x8_t = simd_cast(b); - simd_sub(c, d) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a1: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + let mut ret_val: int8x16x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69443,22 +66744,27 @@ pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { +pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let c: uint32x4_t = simd_cast(a); - let d: uint32x4_t = simd_cast(b); - simd_sub(c, d) + let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69468,22 +66774,32 @@ pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { +pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let c: uint64x2_t = simd_cast(a); - let d: uint64x2_t = simd_cast(b); - simd_sub(c, d) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: int16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69493,18 +66809,27 @@ pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69514,18 +66839,32 @@ pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: int16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69535,18 +66874,27 @@ pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69556,18 +66904,32 @@ pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: int32x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69577,18 +66939,27 @@ pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + unsafe { + let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -69598,1026 +66969,1184 @@ pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: uint8x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sudot, LANE = 0) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - let c = vreinterpret_u32_u8(c); - let c = vdup_lane_u32::(c); - vusdot_s32(a, vreinterpret_u8_u32(c), b) +pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + unsafe { + let a1: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) + } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sudot, LANE = 0) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - let c = vreinterpret_u32_u8(c); - let c = vdupq_lane_u32::(c); - vusdotq_s32(a, vreinterpretq_u8_u32(c), b) -} -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] -#[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sudot, LANE = 3) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vsudot_laneq_s32(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - let c = vreinterpretq_u32_u8(c); - let c = vdup_laneq_u32::(c); - vusdot_s32(a, vreinterpret_u8_u32(c), b) -} -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] -#[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(sudot, LANE = 3) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vsudotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - let c = vreinterpretq_u32_u8(c); - let c = vdupq_laneq_u32::(c); - vusdotq_s32(a, vreinterpretq_u8_u32(c), b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")] - fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vtbl1(a, b) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vtbl1(a, b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vtbl1(transmute(a), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl1(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vtbl1(transmute(a), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl1(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")] - fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - } - unsafe { _vtbl2(a, b, c) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - vtbl2(a.0, a.1, b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - let mut a: uint8x8x2_t = a; +pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a1: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + let mut ret_val: uint8x16x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - let mut a: poly8x8x2_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")] - fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: uint16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } - unsafe { _vtbl3(a, b, c, d) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - vtbl3(a.0, a.1, a.2, b) } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) + let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - let mut a: uint8x8x3_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: uint16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) + let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - let mut a: poly8x8x3_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: uint32x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")] - fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + unsafe { + let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } - unsafe { _vtbl4(a, b, c, d, e) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - vtbl4(a.0, a.1, a.2, a.3, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: poly8x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) + let a1: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - let mut a: uint8x8x4_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a1: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + let mut ret_val: poly8x16x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) + let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - let mut a: poly8x8x4_t = a; - unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")] - fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - } - unsafe { _vtbx1(a, b, c) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - vtbx1(a, b, c) -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: poly16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: poly16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")] - fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { + let c: int8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } - unsafe { _vtbx2(a, b, c, d) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { - vtbx2(a, b.0, b.1, c) } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { unsafe { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) + let c: int8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - let mut b: uint8x8x2_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: int16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { unsafe { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) + let c: int16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { - let mut b: poly8x8x2_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: int32x2_t = simd_and(a, b); + let d: i32x2 = i32x2::new(0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")] - fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { + let c: int32x4_t = simd_and(a, b); + let d: i32x4 = i32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } - unsafe { _vtbx3(a, b, c, d, e) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - vtbx3(a, b.0, b.1, b.2, c) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + unsafe { + let c: poly8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { unsafe { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) + let c: poly8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - let mut b: uint8x8x3_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: poly16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { unsafe { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) + let c: poly16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - let mut b: poly8x8x3_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: uint8x8_t = simd_and(a, b); + let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")] - fn _vtbx4( - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - e: int8x8_t, - f: int8x8_t, - ) -> int8x8_t; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let c: uint8x16_t = simd_and(a, b); + let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } - unsafe { _vtbx4(a, b, c, d, e, f) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe { - vtbx4( - a, - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - c, - ) + let c: uint16x4_t = simd_and(a, b); + let d: u16x4 = u16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - let mut b: int8x8x4_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x8_t = vtbx4( - a, - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - c, - ); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: uint16x8_t = simd_and(a, b); + let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) + let c: uint32x2_t = simd_and(a, b); + let d: u32x2 = u32x2::new(0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { - let mut b: uint8x8x4_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: uint32x4_t = simd_and(a, b); + let d: u32x4 = u32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - unsafe { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) - } +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_s32_s8(c); + let c = vdup_lane_s32::(c); + vusdot_s32(a, b, vreinterpret_s8_s32(c)) } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - let mut b: poly8x8x4_t = b; - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_s32_s8(c); + let c = vdupq_lane_s32::(c); + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(usdot, LANE = 3) )] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_s32_s8(c); + let c = vdup_laneq_s32::(c); + vusdot_s32(a, b, vreinterpret_s8_s32(c)) +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(usdot, LANE = 3) +)] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_s32_s8(c); + let c = vdupq_laneq_s32::(c); + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) +} +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(usdot) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { - unsafe { - let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) +pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")] + fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t; } + unsafe { _vusdot_s32(a, b, c) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(usdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")] + fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vusdotq_s32(a, b, c) } +} +#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usmmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")] + fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vusmmlaq_s32(a, b, c) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[target_feature(enable = "neon,fp16")] #[cfg_attr( @@ -70629,113 +68158,130 @@ pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { +pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); - let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a1, b1)) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: float16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); - let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a1, b1)) + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); - let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a1, b1)) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: float16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70745,26 +68291,27 @@ pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { +pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70774,26 +68321,32 @@ pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { +pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: float32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70803,34 +68356,27 @@ pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { +pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a1: int8x16_t = simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ); - let b1: int8x16_t = simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ); - transmute((a1, b1)) + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70840,26 +68386,32 @@ pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { +pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: int32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70869,26 +68421,27 @@ pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { +pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70898,26 +68451,32 @@ pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { +pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: uint32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70927,26 +68486,27 @@ pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { +pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70956,34 +68516,32 @@ pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { +pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a1: uint8x16_t = simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ); - let b1: uint8x16_t = simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ); - transmute((a1, b1)) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: float32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70993,26 +68551,27 @@ pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { +pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71022,26 +68581,32 @@ pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { +pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: int8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71051,26 +68616,35 @@ pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { +pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71080,26 +68654,50 @@ pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { +pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + let mut ret_val: int8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71109,34 +68707,27 @@ pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { +pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let a1: poly8x16_t = simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ); - let b1: poly8x16_t = simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ); - transmute((a1, b1)) + let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71146,26 +68737,32 @@ pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { +pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: int16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71175,22 +68772,27 @@ pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { +pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71200,22 +68802,32 @@ pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { +pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let c: int8x8_t = simd_and(a, b); - let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: int16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71225,22 +68837,27 @@ pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { +pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let c: int8x16_t = simd_and(a, b); - let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71250,22 +68867,32 @@ pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { +pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let c: int16x4_t = simd_and(a, b); - let d: i16x4 = i16x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: int32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71275,22 +68902,27 @@ pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { +pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let c: int16x8_t = simd_and(a, b); - let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71300,22 +68932,32 @@ pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { +pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let c: int32x2_t = simd_and(a, b); - let d: i32x2 = i32x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: uint8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71325,22 +68967,35 @@ pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { +pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let c: int32x4_t = simd_and(a, b); - let d: i32x4 = i32x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71350,22 +69005,50 @@ pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { +pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let c: poly8x8_t = simd_and(a, b); - let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + let mut ret_val: uint8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71375,22 +69058,27 @@ pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { +pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let c: poly8x16_t = simd_and(a, b); - let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71400,22 +69088,32 @@ pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { +pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let c: poly16x4_t = simd_and(a, b); - let d: i16x4 = i16x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: uint16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71425,22 +69123,27 @@ pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { +pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let c: poly16x8_t = simd_and(a, b); - let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71450,22 +69153,32 @@ pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { +pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let c: uint8x8_t = simd_and(a, b); - let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: uint16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71475,22 +69188,27 @@ pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let c: uint8x16_t = simd_and(a, b); - let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71500,22 +69218,32 @@ pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { +pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let c: uint16x4_t = simd_and(a, b); - let d: u16x4 = u16x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: uint32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71525,22 +69253,27 @@ pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let c: uint16x8_t = simd_and(a, b); - let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71550,22 +69283,32 @@ pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let c: uint32x2_t = simd_and(a, b); - let d: u32x2 = u32x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: poly8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71575,197 +69318,174 @@ pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - let c: uint32x4_t = simd_and(a, b); - let d: u32x4 = u32x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(uzp1) )] -pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - let c = vreinterpret_s32_s8(c); - let c = vdup_lane_s32::(c); - vusdot_s32(a, b, vreinterpret_s8_s32(c)) -} -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] -#[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 0) + assert_instr(uzp2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - let c = vreinterpret_s32_s8(c); - let c = vdupq_lane_s32::(c); - vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) +pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + let mut ret_val: poly8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val + } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 3) + assert_instr(uzp1) )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - let c = vreinterpretq_s32_s8(c); - let c = vdup_laneq_s32::(c); - vusdot_s32(a, b, vreinterpret_s8_s32(c)) -} -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 3) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - let c = vreinterpretq_s32_s8(c); - let c = vdupq_laneq_s32::(c); - vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) -} -#[doc = "Dot product vector form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"] -#[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] -#[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(usdot) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")] - fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t; +pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } - unsafe { _vusdot_s32(a, b, c) } } -#[doc = "Dot product vector form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( - all( - test, - any(target_arch = "aarch64", target_arch = "arm64ec"), - target_endian = "little" - ), - assert_instr(usdot) + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")] - fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; +pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: poly16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } - unsafe { _vusdotq_s32(a, b, c) } } -#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usmmla) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")] - fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; +pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } - unsafe { _vusmmlaq_s32(a, b, c) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( @@ -71776,35 +69496,39 @@ pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp2) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { +pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { unsafe { - let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: poly16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[target_feature(enable = "neon,fp16")] #[cfg_attr( @@ -71816,19 +69540,19 @@ pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { +pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -71837,27 +69561,34 @@ pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: float16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -71866,27 +69597,29 @@ pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -71895,34 +69628,42 @@ pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: float16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71932,26 +69673,27 @@ pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { +pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71961,26 +69703,32 @@ pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { +pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); - transmute((a0, b0)) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: float32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71990,34 +69738,27 @@ pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { +pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a0: int8x16_t = simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ); - let b0: int8x16_t = simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ); + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72027,26 +69768,32 @@ pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { +pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: int32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72056,26 +69803,27 @@ pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { +pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72085,26 +69833,32 @@ pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { +pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: uint32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72114,26 +69868,27 @@ pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { +pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72143,34 +69898,32 @@ pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { +pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a0: uint8x16_t = simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ); - let b0: uint8x16_t = simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ); - transmute((a0, b0)) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: int8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72180,26 +69933,27 @@ pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { +pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72209,26 +69963,32 @@ pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { +pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); - transmute((a0, b0)) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: int16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72238,26 +69998,27 @@ pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { +pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72267,26 +70028,32 @@ pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { +pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); - transmute((a0, b0)) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: uint8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72296,34 +70063,27 @@ pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { +pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let a0: poly8x16_t = simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ); - let b0: poly8x16_t = simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ); + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72333,26 +70093,32 @@ pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { +pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: uint16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72362,18 +70128,20 @@ pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { +pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72382,28 +70150,33 @@ pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { +pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: poly8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72412,29 +70185,28 @@ pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { +pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72451,19 +70223,25 @@ pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { +pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: poly16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72480,19 +70258,20 @@ pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { +pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72509,19 +70288,25 @@ pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { +pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: float32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72538,19 +70323,28 @@ pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { +pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72567,19 +70361,43 @@ pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { +pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + let mut ret_val: int8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72596,19 +70414,20 @@ pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { +pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72625,19 +70444,25 @@ pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { +pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: int16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72654,19 +70479,20 @@ pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { +pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72683,16 +70509,22 @@ pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { +pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: int32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72712,16 +70544,25 @@ pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { +pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72741,24 +70582,40 @@ pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { +pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let a0: int8x16_t = simd_shuffle!( + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x16_t = simd_shuffle!( a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] ); - let b0: int8x16_t = simd_shuffle!( + let b0: uint8x16_t = simd_shuffle!( a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] ); - transmute((a0, b0)) + let mut ret_val: uint8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72778,16 +70635,17 @@ pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { +pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72807,16 +70665,22 @@ pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { +pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: uint16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72836,24 +70700,17 @@ pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { +pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let a0: uint8x16_t = simd_shuffle!( - a, - b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ); - let b0: uint8x16_t = simd_shuffle!( - a, - b, - [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] - ); + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72873,16 +70730,22 @@ pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { +pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); - transmute((a0, b0)) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: uint32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72902,16 +70765,25 @@ pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { +pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); transmute((a0, b0)) } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72933,6 +70805,10 @@ pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { )] pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); let a0: poly8x16_t = simd_shuffle!( a, b, @@ -72943,12 +70819,24 @@ pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] ); - transmute((a0, b0)) + let mut ret_val: poly8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -72975,3 +70863,38 @@ pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { transmute((a0, b0)) } } +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: poly16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } +} diff --git a/crates/intrinsic-test/missing_x86.txt b/crates/intrinsic-test/missing_x86.txt index f88a125bfd..a02f6b68e3 100644 --- a/crates/intrinsic-test/missing_x86.txt +++ b/crates/intrinsic-test/missing_x86.txt @@ -45,15 +45,23 @@ _mm_set1_pch _tpause _umwait -# IMM8 must be an even number in the range `0..=62` -_mm_sm3rnds2_epi32 - # SDE ERROR: Cannot execute XGETBV with ECX != 0 _xgetbv # top bits are undefined, unclear how to test these +_mm256_castph128_ph256 +_mm256_castps128_ps256 +_mm256_castpd128_pd256 _mm256_castsi128_si256 + +_mm512_castph128_ph512 +_mm512_castps128_ps512 +_mm512_castpd128_pd512 _mm512_castsi128_si512 + +_mm512_castph256_ph512 +_mm512_castps256_ps512 +_mm512_castpd256_pd512 _mm512_castsi256_si512 # Clang bug @@ -65,3 +73,15 @@ _mm512_mask_reduce_min_pd _mm512_mask_reduce_min_ps _mm_extract_epi16 _mm_extract_epi8 + +# TODO: fix +_mm_movemask_epi8 +_mm_movemask_pd + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs deleted file mode 100644 index a672da2cc0..0000000000 --- a/crates/intrinsic-test/src/arm/compile.rs +++ /dev/null @@ -1,51 +0,0 @@ -use crate::common::cli::ProcessedCli; -use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; - -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { - let cpp_compiler = config.cpp_compiler.as_ref()?; - - // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations - let mut command = CompilationCommandBuilder::new() - .add_arch_flags(["armv8.6-a", "crypto", "crc", "dotprod", "fp16"]) - .set_compiler(cpp_compiler) - .set_target(&config.target) - .set_opt_level("2") - .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) - .set_project_root("c_programs") - .add_extra_flags(["-ffp-contract=off", "-Wno-narrowing"]); - - if !config.target.contains("v7") { - command = command.add_arch_flags(["faminmax", "lut", "sha3", "fp8"]); - } - - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } - - let mut cpp_compiler = command.into_cpp_compilation(); - - if config.target.contains("aarch64_be") { - let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else { - panic!( - "target `{}` must specify `cxx_toolchain_dir`", - config.target - ) - }; - - cpp_compiler.command_mut().args([ - &format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"), - "--include-directory", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"), - "--include-directory", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"), - "-L", - &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"), - "-L", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"), - "-B", - &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"), - ]); - } - - Some(cpp_compiler) -} diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs index 60bb0ca56c..9371db737e 100644 --- a/crates/intrinsic-test/src/arm/config.rs +++ b/crates/intrinsic-test/src/arm/config.rs @@ -3,51 +3,6 @@ pub const NOTICE: &str = "\ // test are derived from a JSON specification, published under the same license as the // `intrinsic-test` crate.\n"; -pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" -#ifdef __aarch64__ -std::ostream& operator<<(std::ostream& os, poly128_t value); -#endif - -std::ostream& operator<<(std::ostream& os, float16_t value); -std::ostream& operator<<(std::ostream& os, uint8_t value); - -// T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) { - static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret{}; - memcpy(&ret, &x, sizeof(T1)); - return ret; -} -"#; - -pub const PLATFORM_C_DEFINITIONS: &str = r#" -#ifdef __aarch64__ -std::ostream& operator<<(std::ostream& os, poly128_t value) { - std::stringstream temp; - do { - int n = value % 10; - value /= 10; - temp << n; - } while (value != 0); - std::string tempstr(temp.str()); - std::string res(tempstr.rbegin(), tempstr.rend()); - os << res; - return os; -} - -#endif - -std::ostream& operator<<(std::ostream& os, float16_t value) { - os << static_cast(value); - return os; -} - -std::ostream& operator<<(std::ostream& os, uint8_t value) { - os << (unsigned int) value; - return os; -} -"#; - pub const PLATFORM_RUST_DEFINITIONS: &str = ""; pub const PLATFORM_RUST_CFGS: &str = r#" diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 99c8da854c..9bf6c95ffd 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -1,5 +1,4 @@ mod argument; -mod compile; mod config; mod intrinsic; mod json_parser; @@ -7,7 +6,6 @@ mod types; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::ArmIntrinsicType; @@ -15,16 +13,11 @@ use json_parser::get_neon_intrinsics; pub struct ArmArchitectureTest { intrinsics: Vec>, - cli_options: ProcessedCli, } impl SupportedArchitectureTest for ArmArchitectureTest { type IntrinsicImpl = ArmIntrinsicType; - fn cli_options(&self) -> &ProcessedCli { - &self.cli_options - } - fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } @@ -32,18 +25,16 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const NOTICE: &str = config::NOTICE; const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; - const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn cpp_compilation(&self) -> Option { - compile::build_cpp_compilation(&self.cli_options) + fn arch_flags(&self) -> Vec<&str> { + vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"] } fn create(cli_options: ProcessedCli) -> Self { - let a32 = cli_options.target.contains("v7"); + let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target) .expect("Error parsing input file"); @@ -68,9 +59,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .take(sample_size) .collect::>(); - Self { - intrinsics, - cli_options, - } + Self { intrinsics } } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 18468bd558..e9614eba21 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,6 +1,4 @@ use super::intrinsic::ArmIntrinsicType; -use crate::common::cli::Language; -use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; impl IntrinsicTypeDefinition for ArmIntrinsicType { @@ -8,8 +6,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { fn c_type(&self) -> String { let prefix = self.kind.c_prefix(); - if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) { - match (simd_len, vec_len) { + if let Some(bit_len) = self.bit_len { + match (self.simd_len, self.vec_len) { (None, None) => format!("{prefix}{bit_len}_t"), (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), @@ -20,19 +18,24 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { } } - fn c_single_vector_type(&self) -> String { - if let (Some(bit_len), Some(simd_len)) = (self.bit_len, self.simd_len) { - format!( - "{prefix}{bit_len}x{simd_len}_t", - prefix = self.kind.c_prefix() - ) + fn rust_type(&self) -> String { + let rust_prefix = self.kind.rust_prefix(); + let c_prefix = self.kind.c_prefix(); + + if let Some(bit_len) = self.bit_len { + match (self.simd_len, self.vec_len) { + (None, None) => format!("{rust_prefix}{bit_len}"), + (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"), + (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"), + (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case + } } else { - unreachable!("Shouldn't be called on this type") + todo!("{self:#?}") } } /// Determines the load function for this type. - fn get_load_function(&self, language: Language) -> String { + fn get_load_function(&self) -> String { if let IntrinsicType { kind: k, bit_len: Some(bl), @@ -47,16 +50,13 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { "" }; - let choose_workaround = language == Language::C && self.target.contains("v7"); format!( "vld{len}{quad}_{type}{size}", type = match k { TypeKind::Int(Sign::Unsigned) => "u", TypeKind::Int(Sign::Signed) => "s", TypeKind::Float => "f", - // The ACLE doesn't support 64-bit polynomial loads on Armv7 - // if armv7 and bl == 64, use "s", else "p" - TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"}, + TypeKind::Poly => "p", x => todo!("get_load_function TypeKind: {x:#?}"), }, size = bl, @@ -67,97 +67,6 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { todo!("get_load_function IntrinsicType: {self:#?}") } } - - /// Determines the get lane function for this type. - fn get_lane_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - simd_len, - .. - } = &self.data - { - let quad = if (simd_len.unwrap_or(1) * bl) > 64 { - "q" - } else { - "" - }; - format!( - "vget{quad}_lane_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - ) - } else { - todo!("get_lane_function IntrinsicType: {self:#?}") - } - } - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.num_vectors() > 1 { - (0..self.num_vectors()) - .map(|vector| { - format!( - r#""{ty}(" << {lanes} << ")""#, - ty = self.c_single_vector_type(), - lanes = (0..self.num_lanes()) - .map(move |idx| -> std::string::String { - let lane_fn = self.get_lane_function(); - let final_cast = self.generate_final_type_cast(); - format!( - "{final_cast}{lane_fn}(__return_value.val[{vector}], {idx})" - ) - }) - .collect::>() - .join(r#" << ", " << "#) - ) - }) - .collect::>() - .join(r#" << ", " << "#) - } else if self.num_lanes() > 1 { - (0..self.num_lanes()) - .map(|idx| -> std::string::String { - let lane_fn = self.get_lane_function(); - let final_cast = self.generate_final_type_cast(); - format!("{final_cast}{lane_fn}(__return_value, {idx})") - }) - .collect::>() - .join(r#" << ", " << "#) - } else { - format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.kind() { - TypeKind::Float if self.inner_size() == 16 => "float16_t".to_string(), - TypeKind::Float if self.inner_size() == 32 => "float".to_string(), - TypeKind::Float if self.inner_size() == 64 => "double".to_string(), - TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()), - TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()), - TypeKind::Poly => format!("poly{}_t", self.inner_size()), - ty => todo!("print_result_c - Unknown type: {ty:#?}"), - }, - promote = self.generate_final_type_cast(), - ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.is_simd() { - format!("{}(", self.c_type()) - } else { - String::from("") - }, - close = if self.is_simd() { ")" } else { "" }, - ) - } } impl ArmIntrinsicType { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 8ae9869db0..04729cd714 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,4 +1,5 @@ -use super::cli::Language; +use itertools::Itertools; + use super::constraint::Constraint; use super::gen_rust::PASSES; use super::indentation::Indentation; @@ -50,32 +51,15 @@ where self.constraint.is_some() } - /// The binding keyword (e.g. "const" or "let") for the array of possible test inputs. - fn rust_vals_array_binding(&self) -> impl std::fmt::Display { - if self.ty.is_rust_vals_array_const() { - "const" - } else { - "let" - } - } - /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs. pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display { - if self.ty.is_rust_vals_array_const() { - let loads = crate::common::gen_rust::PASSES; - format!( - "{}_{ty}_{load_size}", - self.name.to_uppercase(), - ty = self.ty.rust_scalar_type(), - load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1, - ) - } else { - format!("{}_vals", self.name.to_lowercase()) - } - } - - fn as_call_param_c(&self) -> String { - self.ty.as_call_param_c(&self.generate_name()) + let loads = crate::common::gen_rust::PASSES; + format!( + "{}_{ty}_{load_size}", + self.name.to_uppercase(), + ty = self.ty.rust_scalar_type(), + load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1, + ) } } @@ -88,13 +72,40 @@ impl ArgumentList where T: IntrinsicTypeDefinition, { - /// Converts the argument list into the call parameters for a C function call. - /// e.g. this would generate something like `a, &b, c` - pub fn as_call_param_c(&self) -> String { + pub fn as_non_imm_arglist_c(&self) -> String { self.iter() - .map(|arg| arg.as_call_param_c()) - .collect::>() - .join(", ") + .filter(|arg| !arg.has_constraint()) + .format_with("", |arg, fmt| { + fmt(&format_args!(", const {}* {}", arg.to_c_type(), arg.name)) + }) + .to_string() + } + + pub fn as_non_imm_arglist_rust(&self) -> String { + self.iter() + .filter(|arg| !arg.has_constraint()) + .format_with("", |arg, fmt| { + fmt(&format_args!( + ", {}: *const {}", + arg.name, + arg.ty.rust_type() + )) + }) + .to_string() + } + + pub fn as_call_params_c(&self, imm_args: &[i64]) -> String { + let mut imm_args = imm_args.iter(); + self.iter() + .format_with(", ", |arg, fmt| { + if arg.has_constraint() { + fmt(&imm_args.next().unwrap()) + } else { + fmt(&"*")?; + fmt(&arg.name) + } + }) + .to_string() } /// Converts the argument list into the call parameters for a Rust function. @@ -103,52 +114,14 @@ where self.iter() .filter(|a| !a.has_constraint()) .map(|arg| arg.generate_name() + " as _") - .collect::>() .join(", ") } - /// Creates a line for each argument that initializes an array for C from which `loads` argument - /// values can be loaded as a sliding window. - /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. - pub fn gen_arglists_c( - &self, - w: &mut impl std::io::Write, - indentation: Indentation, - loads: u32, - ) -> std::io::Result<()> { - for arg in self.iter().filter(|&arg| !arg.has_constraint()) { - // Setting the variables on an aligned boundary to make it easier to pick - // functions (of a specific architecture) that would help load the values. - writeln!( - w, - "{indentation}alignas(64) const {ty} {name}_vals[] = {values};", - ty = arg.ty.c_scalar_type(), - name = arg.generate_name(), - values = arg.ty.populate_random(indentation, loads, &Language::C) - )? - } - - Ok(()) - } - - /// Creates a line for each argument that initializes an array for Rust from which `loads` argument - /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];` - pub fn gen_arglists_rust( - &self, - w: &mut impl std::io::Write, - indentation: Indentation, - loads: u32, - ) -> std::io::Result<()> { - for arg in self.iter().filter(|&arg| !arg.has_constraint()) { - // Constants are defined globally. - if arg.ty.is_rust_vals_array_const() { - continue; - } - - Self::gen_arg_rust(arg, w, indentation, loads)?; - } - - Ok(()) + pub fn as_c_call_param_rust(&self) -> String { + self.iter() + .filter(|a| !a.has_constraint()) + .map(|arg| format!(", &raw const {} as _", arg.generate_name())) + .join("") } pub fn gen_arg_rust( @@ -159,39 +132,14 @@ where ) -> std::io::Result<()> { writeln!( w, - "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};\n", - bind = arg.rust_vals_array_binding(), + "{indentation}static {name}: [{ty}; {load_size}] = {values};\n", name = arg.rust_vals_array_name(), ty = arg.ty.rust_scalar_type(), load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, - values = arg.ty.populate_random(indentation, loads, &Language::Rust) + values = arg.ty.populate_random(indentation, loads) ) } - /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at - /// an offset `i` using a load intrinsic, in C. - /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);` - /// - /// ARM-specific - pub fn load_values_c(&self, indentation: Indentation) -> String { - self.iter() - .filter(|&arg| !arg.has_constraint()) - .enumerate() - .map(|(idx, arg)| { - format!( - "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[(i+{idx}) % {PASSES}]));\n", - ty = arg.to_c_type(), - name = arg.generate_name(), - load = if arg.is_simd() { - arg.ty.get_load_function(Language::C) - } else { - "*".to_string() - } - ) - }) - .collect() - } - /// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at /// an offset `i` using a load intrinsic, in Rust. /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));` @@ -205,7 +153,7 @@ where "{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", name = arg.generate_name(), vals_name = arg.rust_vals_array_name(), - load = arg.ty.get_load_function(Language::Rust), + load = arg.ty.get_load_function(), ) } else { format!( diff --git a/crates/intrinsic-test/src/common/cli.rs b/crates/intrinsic-test/src/common/cli.rs index bed8259de8..f407b5ceb7 100644 --- a/crates/intrinsic-test/src/common/cli.rs +++ b/crates/intrinsic-test/src/common/cli.rs @@ -1,12 +1,6 @@ use itertools::Itertools; use std::path::PathBuf; -#[derive(Debug, PartialEq)] -pub enum Language { - Rust, - C, -} - /// Intrinsic test tool #[derive(clap::Parser)] #[command( @@ -17,41 +11,13 @@ pub struct Cli { /// The input file containing the intrinsics pub input: PathBuf, - /// The rust toolchain to use for building the rust code - #[arg(long)] - pub toolchain: Option, - - /// The C++ compiler to use for compiling the c++ code - #[arg(long, default_value_t = String::from("clang++"))] - pub cppcompiler: String, - - /// Run the C programs under emulation with this command - #[arg(long)] - pub runner: Option, - /// Filename for a list of intrinsics to skip (one per line) #[arg(long)] pub skip: Option, - /// Regenerate test programs, but don't build or run them - #[arg(long)] - pub generate_only: bool, - /// Pass a target the test suite - #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] - pub target: String, - - /// Pass a profile (release, dev) - #[arg(long, default_value_t = String::from("release"))] - pub profile: String, - - /// Set the linker - #[arg(long)] - pub linker: Option, - - /// Set the sysroot for the C++ compiler #[arg(long)] - pub cxx_toolchain_dir: Option, + pub target: String, #[arg(long, default_value_t = 100u8)] pub sample_percentage: u8, @@ -59,13 +25,7 @@ pub struct Cli { pub struct ProcessedCli { pub filename: PathBuf, - pub toolchain: Option, - pub cpp_compiler: Option, - pub runner: String, pub target: String, - pub profile: String, - pub linker: Option, - pub cxx_toolchain_dir: Option, pub skip: Vec, pub sample_percentage: u8, } @@ -73,11 +33,7 @@ pub struct ProcessedCli { impl ProcessedCli { pub fn new(cli_options: Cli) -> Self { let filename = cli_options.input; - let runner = cli_options.runner.unwrap_or_default(); let target = cli_options.target; - let profile = cli_options.profile; - let linker = cli_options.linker; - let cxx_toolchain_dir = cli_options.cxx_toolchain_dir; let sample_percentage = cli_options.sample_percentage; let skip = if let Some(filename) = cli_options.skip { @@ -91,27 +47,8 @@ impl ProcessedCli { Default::default() }; - let (toolchain, cpp_compiler) = if cli_options.generate_only { - (None, None) - } else { - ( - Some( - cli_options - .toolchain - .map_or_else(String::new, |t| format!("+{t}")), - ), - Some(cli_options.cppcompiler), - ) - }; - Self { - toolchain, - cpp_compiler, - runner, target, - profile, - linker, - cxx_toolchain_dir, skip, filename, sample_percentage, diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs deleted file mode 100644 index c1438d1bbf..0000000000 --- a/crates/intrinsic-test/src/common/compare.rs +++ /dev/null @@ -1,144 +0,0 @@ -use itertools::Itertools; -use rayon::prelude::*; -use std::{collections::HashMap, process::Command}; - -pub const INTRINSIC_DELIMITER: &str = "############"; -fn runner_command(runner: &str) -> Command { - let mut it = runner.split_whitespace(); - let mut cmd = Command::new(it.next().unwrap()); - cmd.args(it); - - cmd -} - -pub fn compare_outputs( - intrinsic_name_list: &Vec, - runner: &str, - target: &str, - profile: &str, -) -> bool { - let profile_dir = match profile { - "dev" => "debug", - _ => "release", - }; - - let (c, rust) = rayon::join( - || { - runner_command(runner) - .arg("./intrinsic-test-programs") - .current_dir("c_programs") - .output() - }, - || { - runner_command(runner) - .arg(format!( - "./target/{target}/{profile_dir}/intrinsic-test-programs" - )) - .current_dir("rust_programs") - .output() - }, - ); - let (c, rust) = match (c, rust) { - (Ok(c), Ok(rust)) => (c, rust), - failure => panic!("Failed to run: {failure:#?}"), - }; - - if !c.status.success() { - error!( - "Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), - ); - } - - if !rust.status.success() { - error!( - "Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), - ); - } - - info!("Completed running C++ and Rust test binaries"); - let c = std::str::from_utf8(&c.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - let rust = std::str::from_utf8(&rust.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - - let c_output_map = c - .split(INTRINSIC_DELIMITER) - .filter_map(|output| output.trim().split_once("\n")) - .collect::>(); - let rust_output_map = rust - .split(INTRINSIC_DELIMITER) - .filter_map(|output| output.trim().split_once("\n")) - .collect::>(); - - assert!(!c_output_map.is_empty(), "No C intrinsic output found!"); - - let intrinsics = c_output_map - .keys() - .chain(rust_output_map.keys()) - .unique() - .collect_vec(); - - info!("Comparing outputs"); - let intrinsics_diff_count = intrinsics - .par_iter() - .filter_map(|&&intrinsic| { - let c_output = c_output_map.get(intrinsic).unwrap(); - let rust_output = rust_output_map.get(intrinsic).unwrap(); - if rust_output.eq(c_output) { - None - } else { - let diff = diff::lines(c_output, rust_output); - let diffs = diff - .into_iter() - .filter_map(|diff| match diff { - diff::Result::Left(_) | diff::Result::Right(_) => Some(diff), - diff::Result::Both(_, _) => None, - }) - .collect_vec(); - if diffs.len() > 0 { - Some((intrinsic, diffs)) - } else { - None - } - } - }) - .inspect(|(intrinsic, diffs)| { - use std::io::Write; - - let stdout = std::io::stdout(); - let mut out = stdout.lock(); - - writeln!(out, "Difference for intrinsic: {intrinsic}").unwrap(); - diffs.into_iter().for_each(|diff| match diff { - diff::Result::Left(c) => { - writeln!(out, "C: {c}").unwrap(); - } - diff::Result::Right(rust) => { - writeln!(out, "Rust: {rust}").unwrap(); - } - _ => (), - }); - writeln!( - out, - "****************************************************************" - ) - .unwrap(); - }) - .count(); - - println!( - "{} differences found (tested {} intrinsics)", - intrinsics_diff_count, - intrinsic_name_list.len() - ); - - intrinsics_diff_count == 0 -} diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs deleted file mode 100644 index fa78b332a7..0000000000 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ /dev/null @@ -1,136 +0,0 @@ -#[derive(Clone)] -pub struct CompilationCommandBuilder { - compiler: String, - target: Option, - cxx_toolchain_dir: Option, - arch_flags: Vec, - optimization: String, - project_root: Option, - extra_flags: Vec, -} - -impl CompilationCommandBuilder { - pub fn new() -> Self { - Self { - compiler: String::new(), - target: None, - cxx_toolchain_dir: None, - arch_flags: Vec::new(), - optimization: "2".to_string(), - project_root: None, - extra_flags: Vec::new(), - } - } - - pub fn set_compiler(mut self, compiler: &str) -> Self { - self.compiler = compiler.to_string(); - self - } - - pub fn set_target(mut self, target: &str) -> Self { - self.target = Some(target.to_string()); - self - } - - pub fn set_cxx_toolchain_dir(mut self, path: Option<&str>) -> Self { - self.cxx_toolchain_dir = path.map(|p| p.to_string()); - self - } - - pub fn add_arch_flags<'a>(mut self, flags: impl IntoIterator) -> Self { - self.arch_flags - .extend(flags.into_iter().map(|s| s.to_owned())); - - self - } - - pub fn set_opt_level(mut self, optimization: &str) -> Self { - self.optimization = optimization.to_string(); - self - } - - /// Sets the root path of all the generated test files. - pub fn set_project_root(mut self, path: &str) -> Self { - self.project_root = Some(path.to_string()); - self - } - - pub fn add_extra_flags<'a>(mut self, flags: impl IntoIterator) -> Self { - self.extra_flags - .extend(flags.into_iter().map(|s| s.to_owned())); - - self - } - - pub fn add_extra_flag(self, flag: &str) -> Self { - self.add_extra_flags([flag]) - } -} - -impl CompilationCommandBuilder { - pub fn into_cpp_compilation(self) -> CppCompilation { - let mut cpp_compiler = std::process::Command::new(self.compiler); - - if let Some(project_root) = self.project_root { - cpp_compiler.current_dir(project_root); - } - - let flags = std::env::var("CPPFLAGS").unwrap_or("".into()); - cpp_compiler.args(flags.split_whitespace()); - - cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+"))); - - cpp_compiler.arg(format!("-O{}", self.optimization)); - - cpp_compiler.args(self.extra_flags); - - if let Some(target) = &self.target { - cpp_compiler.arg(format!("--target={target}")); - } - - CppCompilation(cpp_compiler) - } -} - -pub struct CppCompilation(std::process::Command); - -fn clone_command(command: &std::process::Command) -> std::process::Command { - let mut cmd = std::process::Command::new(command.get_program()); - if let Some(current_dir) = command.get_current_dir() { - cmd.current_dir(current_dir); - } - cmd.args(command.get_args()); - - for (key, val) in command.get_envs() { - cmd.env(key, val.unwrap_or_default()); - } - - cmd -} - -impl CppCompilation { - pub fn command_mut(&mut self) -> &mut std::process::Command { - &mut self.0 - } - - pub fn compile_object_file( - &self, - input: &str, - output: &str, - ) -> std::io::Result { - let mut cmd = clone_command(&self.0); - cmd.args([input, "-v", "-c", "-o", output]); - cmd.output() - } - - pub fn link_executable( - &self, - inputs: impl Iterator, - output: &str, - ) -> std::io::Result { - let mut cmd = clone_command(&self.0); - cmd.args(inputs); - cmd.args(["-o", output]); - cmd.output() - } -} diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index a95b4c36b7..bdf6f68d58 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -1,166 +1,42 @@ +use itertools::Itertools; + use crate::common::intrinsic::Intrinsic; -use super::argument::Argument; -use super::compare::INTRINSIC_DELIMITER; -use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; -// The number of times each intrinsic will be called. -const PASSES: u32 = 20; -const COMMON_HEADERS: [&str; 7] = [ - "iostream", - "string", - "cstring", - "iomanip", - "sstream", - "type_traits", - "cassert", -]; - -pub fn generate_c_test_loop( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, - indentation: Indentation, - additional: &str, - passes: u32, -) -> std::io::Result<()> { - let body_indentation = indentation.nested(); - writeln!( - w, - "{indentation}for (int i=0; i<{passes}; i++) {{\n\ - {loaded_args}\ - {body_indentation}auto __return_value = {intrinsic_call}({args});\n\ - {print_result}\n\ - {indentation}}}", - loaded_args = intrinsic.arguments.load_values_c(body_indentation), - intrinsic_call = intrinsic.name, - args = intrinsic.arguments.as_call_param_c(), - print_result = intrinsic - .results - .print_result_c(body_indentation, additional) - ) -} - -pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, - indentation: Indentation, - constraints: &mut (impl Iterator> + Clone), - name: String, -) -> std::io::Result<()> { - let Some(current) = constraints.next() else { - return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES); - }; - - let body_indentation = indentation.nested(); - for i in current.constraint.iter().flat_map(|c| c.iter()) { - let ty = current.ty.c_type(); - - writeln!(w, "{indentation}{{")?; - - // TODO: Move to actually specifying the enum value - // instead of typecasting integers, for better clarity - // of generated code. - writeln!( - w, - "{body_indentation}const {ty} {} = ({ty}){i};", - current.generate_name() - )?; - - generate_c_constraint_blocks( - w, - intrinsic, - body_indentation, - &mut constraints.clone(), - format!("{name}-{i}"), - )?; - - writeln!(w, "{indentation}}}")?; - } - - Ok(()) -} - -// Compiles C test programs using specified compiler -pub fn create_c_test_function( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, -) -> std::io::Result<()> { - let indentation = Indentation::default(); - - writeln!(w, "int run_{}() {{", intrinsic.name)?; - - // Define the arrays of arguments. - let arguments = &intrinsic.arguments; - arguments.gen_arglists_c(w, indentation.nested(), PASSES)?; - - generate_c_constraint_blocks( - w, - intrinsic, - indentation.nested(), - &mut arguments.iter().rev().filter(|&i| i.has_constraint()), - Default::default(), - )?; - - writeln!(w, " return 0;")?; - writeln!(w, "}}")?; - - Ok(()) -} - -pub fn write_mod_cpp( +pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, platform_headers: &[&str], - forward_declarations: &str, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; - for header in COMMON_HEADERS.iter().chain(platform_headers.iter()) { - writeln!(w, "#include <{header}>")?; - } - - writeln!(w, "{}", forward_declarations)?; - - for intrinsic in intrinsics { - create_c_test_function(w, intrinsic)?; - } - - Ok(()) -} + writeln!(w, "#include ")?; + writeln!(w, "#include ")?; -pub fn write_main_cpp<'a>( - w: &mut impl std::io::Write, - arch_specific_definitions: &str, - arch_specific_headers: &[&str], - intrinsics: impl Iterator + Clone, -) -> std::io::Result<()> { - for header in COMMON_HEADERS.iter().chain(arch_specific_headers.iter()) { + for header in platform_headers { writeln!(w, "#include <{header}>")?; } - // NOTE: It's assumed that this value contains the required `ifdef`s. - writeln!(w, "{arch_specific_definitions }")?; - - for intrinsic in intrinsics.clone() { - writeln!(w, "extern int run_{intrinsic}(void);")?; - } - - writeln!(w, "int main(int argc, char **argv) {{")?; - for intrinsic in intrinsics { - writeln!( - w, - " std::cout << \"{INTRINSIC_DELIMITER}\" << std::endl;" - )?; - writeln!(w, " std::cout << \"{intrinsic}\" << std::endl;")?; - writeln!(w, " run_{intrinsic}();\n")?; + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " +void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ + *__dst = {name}({params}); +}}", + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + ) + })?; } - writeln!(w, " return 0;")?; - - writeln!(w, "}}")?; - Ok(()) } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 82b97701bb..d11bcbdce6 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,23 +1,53 @@ use itertools::Itertools; -use std::process::Command; -use super::compare::INTRINSIC_DELIMITER; use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; use crate::common::argument::ArgumentList; use crate::common::intrinsic::Intrinsic; +use crate::common::intrinsic_helpers::TypeKind; // The number of times each intrinsic will be called. pub(crate) const PASSES: u32 = 20; +const COMMON_RUST_DEFINITIONS: &str = r#" +macro_rules! make_nice { + ($($wrapper:ident ($inner:ty)),*) => {$( + #[derive(Debug, Copy, Clone)] + #[repr(transparent)] + pub struct $wrapper($inner); + + impl PartialEq for $wrapper { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 || (self.0.is_nan() && other.0.is_nan()) + } + } + + impl Eq for $wrapper {} + )*} +} + +make_nice!(NiceF16(f16), NiceF32(f32), NiceF64(f64)); +"#; + macro_rules! concatln { ($($lines:expr),* $(,)?) => { concat!($( $lines, "\n" ),*) }; } -fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { - writeln!( +pub fn write_bin_cargo_toml( + w: &mut impl std::io::Write, + module_count: usize, +) -> std::io::Result<()> { + write!(w, concatln!("[workspace]", "members = ["))?; + for i in 0..module_count { + writeln!(w, " \"mod_{i}\",")?; + } + writeln!(w, "]") +} + +pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { + write!( w, concatln!( "[package]", @@ -26,6 +56,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: "authors = [{authors}]", "license = \"{license}\"", "edition = \"2018\"", + "", + "[dependencies]", + "core_arch = {{ path = \"../../crates/core_arch\" }}", + "", + "[build-dependencies]", + "cc = \"1\"" ), name = name, version = env!("CARGO_PKG_VERSION"), @@ -36,72 +72,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: ) } -pub fn write_bin_cargo_toml( - w: &mut impl std::io::Write, - module_count: usize, -) -> std::io::Result<()> { - write_cargo_toml_header(w, "intrinsic-test-programs")?; - - writeln!(w, "[dependencies]")?; - writeln!(w, "core_arch = {{ path = \"../crates/core_arch\" }}")?; - - for i in 0..module_count { - writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?; - } - - Ok(()) -} - -pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { - write_cargo_toml_header(w, name)?; - - writeln!(w, "[dependencies]")?; - writeln!(w, "core_arch = {{ path = \"../../crates/core_arch\" }}")?; - - Ok(()) -} - -pub fn write_main_rs<'a>( - w: &mut impl std::io::Write, - chunk_count: usize, - cfg: &str, - definitions: &str, - intrinsics: impl Iterator + Clone, -) -> std::io::Result<()> { - writeln!(w, "#![feature(simd_ffi)]")?; - writeln!(w, "#![feature(f16)]")?; - writeln!(w, "#![allow(unused)]")?; - - // Cargo will spam the logs if these warnings are not silenced. - writeln!(w, "#![allow(non_upper_case_globals)]")?; - writeln!(w, "#![allow(non_camel_case_types)]")?; - writeln!(w, "#![allow(non_snake_case)]")?; - - writeln!(w, "{cfg}")?; - writeln!(w, "{definitions}")?; - - for module in 0..chunk_count { - writeln!(w, "use mod_{module}::*;")?; - } - - writeln!(w, "fn main() {{")?; - - for binary in intrinsics { - writeln!(w, " println!(\"{INTRINSIC_DELIMITER}\");")?; - writeln!(w, " println!(\"{binary}\");")?; - writeln!(w, " run_{binary}();\n")?; - } - - writeln!(w, "}}")?; - - Ok(()) -} - pub fn write_lib_rs( w: &mut impl std::io::Write, notice: &str, cfg: &str, definitions: &str, + i: usize, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; @@ -117,13 +93,15 @@ pub fn write_lib_rs( writeln!(w, "{cfg}")?; + writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?; + writeln!(w, "{definitions}")?; let mut seen = std::collections::HashSet::new(); for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { - if !arg.has_constraint() && arg.ty.is_rust_vals_array_const() { + if !arg.has_constraint() { let name = arg.rust_vals_array_name().to_string(); if seen.insert(name) { @@ -133,190 +111,189 @@ pub fn write_lib_rs( } } + write_bindings_rust(w, i, intrinsics)?; + for intrinsic in intrinsics { - crate::common::gen_rust::create_rust_test_module(w, intrinsic)?; + create_rust_test(w, intrinsic)?; } Ok(()) } -pub fn compile_rust_programs( - toolchain: Option<&str>, - target: &str, - profile: &str, - linker: Option<&str>, -) -> bool { - /* If there has been a linker explicitly set from the command line then - * we want to set it via setting it in the RUSTFLAGS*/ - - // This is done because `toolchain` is None when - // the --generate-only flag is passed - if toolchain.is_none() { - return true; - } - - trace!("Building cargo command"); - - let mut cargo_command = Command::new("cargo"); - cargo_command.current_dir("rust_programs"); - - // Do not use the target directory of the workspace please. - cargo_command.env("CARGO_TARGET_DIR", "target"); - - if toolchain.is_some_and(|val| !val.is_empty()) { - cargo_command.arg(toolchain.unwrap()); - } - cargo_command.args(["build", "--target", target, "--profile", profile]); - - let mut rust_flags = "-Cdebuginfo=0".to_string(); - if let Some(linker) = linker { - rust_flags.push_str(" -C linker="); - rust_flags.push_str(linker); - rust_flags.push_str(" -C link-args=-static"); - - cargo_command.env("CPPFLAGS", "-fuse-ld=lld"); - } - - cargo_command.env("RUSTFLAGS", rust_flags); - - trace!("running cargo"); - - if log::log_enabled!(log::Level::Trace) { - cargo_command.stdout(std::process::Stdio::inherit()); - cargo_command.stderr(std::process::Stdio::inherit()); - } - - let output = cargo_command.output(); - trace!("cargo is done"); - - if let Ok(output) = output { - if output.status.success() { - true - } else { - error!( - "Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}", - std::str::from_utf8(&output.stdout).unwrap_or(""), - std::str::from_utf8(&output.stderr).unwrap_or("") - ); - false - } - } else { - error!("Command failed: {output:#?}"); - false - } -} - -pub fn generate_rust_test_loop( +fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, - indentation: Indentation, - specializations: &[Vec], passes: u32, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; // Each function (and each specialization) has its own type. Erase that type with a cast. - let mut coerce = String::from("unsafe fn("); + let mut coerce = String::from("fn("); + let mut c_coerce = String::from("fn(_, "); for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) { coerce += "_, "; + c_coerce += "_, "; } coerce += ") -> _"; - - match specializations { - [] => { - writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; - } - [const_args] if const_args.is_empty() => { - writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; - } - _ => { - writeln!(w, " let specializations = [")?; - - for specialization in specializations { - let mut specialization: Vec<_> = - specialization.iter().map(|d| d.to_string()).collect(); - - let const_args = specialization.join(","); - - // The identifier is reversed. - specialization.reverse(); - let id = specialization.join("-"); - - writeln!( - w, - " (\"-{id}\", {intrinsic_name}::<{const_args}> as {coerce})," - )?; - } - - writeln!(w, " ];")?; - } + c_coerce += ")"; + + if intrinsic + .arguments + .iter() + .filter(|arg| arg.has_constraint()) + .count() + == 0 + { + writeln!( + w, + " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" + )?; + } else { + writeln!(w, " let specializations = [")?; + + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", + const_args = imm_values.iter().join(","), + c_const_args = imm_values.iter().join("_"), + ) + })?; + + writeln!(w, " ];")?; } + let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() { + ( + format!( + "std::mem::transmute::<_, [{}; {}]>(", + intrinsic.results.rust_scalar_type().replace("f", "NiceF"), + intrinsic.results.num_lanes() * intrinsic.results.num_vectors() + ), + ")", + ) + } else if intrinsic.results.kind == TypeKind::Float { + ( + match intrinsic.results.inner_size() { + 16 => format!("NiceF16("), + 32 => format!("NiceF32("), + 64 => format!("NiceF64("), + _ => unimplemented!(), + }, + ")", + ) + } else { + ("".to_string(), "") + }; + write!( w, concatln!( - " for (id, f) in specializations {{", + " for (id, rust, c) in specializations {{", " for i in 0..{passes} {{", " unsafe {{", "{loaded_args}", - " let __return_value = f({args});", - " println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});", + " let __rust_return_value = rust({rust_args});", + "", + " let mut __c_return_value = std::mem::MaybeUninit::uninit();", + " c(__c_return_value.as_mut_ptr(){c_args});", + " let __c_return_value = __c_return_value.assume_init();", + "", + " assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");", " }}", " }}", " }}", ), - loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)), - args = intrinsic.arguments.as_call_param_rust(), - return_value = intrinsic.results.print_result_rust(), + loaded_args = intrinsic + .arguments + .load_values_rust(Indentation::default().nest_by(4)), + rust_args = intrinsic.arguments.as_call_param_rust(), + c_args = intrinsic.arguments.as_c_call_param_rust(), passes = passes, + cast_prefix = cast_prefix, + cast_suffix = cast_suffix, ) } -/// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic. -fn generate_rust_specializations( - constraints: &mut impl Iterator>, -) -> Vec> { - let mut specializations = vec![vec![]]; - - for constraint in constraints { - specializations = constraint - .flat_map(|right| { - specializations.iter().map(move |left| { - let mut left = left.clone(); - left.push(i32::try_from(right).unwrap()); - left - }) - }) - .collect(); - } +fn create_rust_test( + w: &mut impl std::io::Write, + intrinsic: &Intrinsic, +) -> std::io::Result<()> { + trace!("generating `{}`", intrinsic.name); + + write!( + w, + concatln!("#[test]", "fn test_{intrinsic_name}() {{"), + intrinsic_name = intrinsic.name, + )?; + + generate_rust_test_loop(w, intrinsic, PASSES)?; + + writeln!(w, "}}")?; - specializations + Ok(()) } -// Top-level function to create complete test program -pub fn create_rust_test_module( +pub fn write_bindings_rust( w: &mut impl std::io::Write, - intrinsic: &Intrinsic, + i: usize, + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - trace!("generating `{}`", intrinsic.name); - let indentation = Indentation::default(); + writeln!( + w, + concatln!( + "#[allow(improper_ctypes)]", + "#[link(name = \"wrapper_{i}\")]", + "unsafe extern \"C\" {{" + ), + i = i + )?; - writeln!(w, "pub fn run_{}() {{", intrinsic.name)?; + for intrinsic in intrinsics { + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + ) + })?; + } - // Define the arrays of arguments. - let arguments = &intrinsic.arguments; - arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?; + writeln!(w, "}}") +} - // Define any const generics as `const` items, then generate the actual test loop. - let specializations = generate_rust_specializations( - &mut arguments - .iter() - .filter_map(|i| i.constraint.as_ref().map(|v| v.iter())), - ); +pub fn write_build_rs( + w: &mut impl std::io::Write, + i: usize, + arch_flags: &[&str], +) -> std::io::Result<()> { + const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-ffp-model=strict", "-Wno-narrowing"]; - generate_rust_test_loop(w, intrinsic, indentation, &specializations, PASSES)?; + write!( + w, + concatln!( + "fn main() {{", + " cc::Build::new()", + " .file(\"../../c_programs/wrapper_{i}.c\")", + " .opt_level(2)", + " .flags(&[", + ), + i = i + )?; - writeln!(w, "}}")?; + let indentation = Indentation::default().nest_by(2); + for flag in COMMON_FLAGS.iter().chain(arch_flags) { + writeln!(w, "{indentation}\"{flag}\",")?; + } - Ok(()) + write!( + w, + concatln!(" ])", " .compile(\"wrapper_{i}\");", "}}"), + i = i + ) } diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 81f6d6d8b5..76e5959153 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,3 +1,5 @@ +use crate::common::constraint::Constraint; + use super::argument::ArgumentList; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -16,3 +18,36 @@ pub struct Intrinsic { /// Any architecture-specific tags. pub arch_tags: Vec, } + +fn recurse_specializations<'a, E>( + constraints: &mut (impl Iterator + Clone), + imm_values: &mut Vec, + f: &mut impl FnMut(&[i64]) -> Result<(), E>, +) -> Result<(), E> { + if let Some(current) = constraints.next() { + for i in current.iter() { + imm_values.push(i); + recurse_specializations(&mut constraints.clone(), imm_values, f)?; + imm_values.pop(); + } + Ok(()) + } else { + f(&imm_values) + } +} + +impl Intrinsic { + pub fn iter_specializations( + &self, + mut f: impl FnMut(&[i64]) -> Result<(), E>, + ) -> Result<(), E> { + recurse_specializations( + &mut self + .arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()), + &mut Vec::new(), + &mut f, + ) + } +} diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index a14d7ef05f..06512801ce 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -5,7 +5,6 @@ use std::str::FromStr; use itertools::Itertools as _; -use super::cli::Language; use super::indentation::Indentation; use super::values::value_for_array; @@ -94,6 +93,7 @@ impl TypeKind { Self::Poly => "u", Self::Char(Sign::Unsigned) => "u", Self::Char(Sign::Signed) => "i", + Self::Mask => "u", _ => unreachable!("Unused type kind: {self:#?}"), } } @@ -154,67 +154,7 @@ impl IntrinsicType { self.ptr } - pub fn c_scalar_type(&self) -> String { - match self.kind() { - TypeKind::Char(_) => String::from("char"), - TypeKind::Vector => String::from("int32_t"), - _ => format!( - "{prefix}{bits}_t", - prefix = self.kind().c_prefix(), - bits = self.inner_size() - ), - } - } - - pub fn c_promotion(&self) -> &str { - match *self { - IntrinsicType { - kind, - bit_len: Some(8), - .. - } => match kind { - TypeKind::Int(Sign::Signed) => "int", - TypeKind::Int(Sign::Unsigned) => "unsigned int", - TypeKind::Poly => "uint8_t", - _ => "", - }, - IntrinsicType { - kind: TypeKind::Poly, - bit_len: Some(bit_len), - .. - } => match bit_len { - 8 => unreachable!("handled above"), - 16 => "uint16_t", - 32 => "uint32_t", - 64 => "uint64_t", - 128 => "", - _ => panic!("invalid bit_len"), - }, - IntrinsicType { - kind: TypeKind::Float, - bit_len: Some(bit_len), - .. - } => match bit_len { - 16 => "float16_t", - 32 => "float", - 64 => "double", - 128 => "", - _ => panic!("invalid bit_len"), - }, - IntrinsicType { - kind: TypeKind::Char(_), - .. - } => "char", - _ => "", - } - } - - pub fn populate_random( - &self, - indentation: Indentation, - loads: u32, - language: &Language, - ) -> String { + pub fn populate_random(&self, indentation: Indentation, loads: u32) -> String { match self { IntrinsicType { bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)), @@ -224,13 +164,9 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, suffix) = match language { - Language::Rust => ('[', ']'), - Language::C => ('{', '}'), - }; let body_indentation = indentation.nested(); format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(*bit_len, i); @@ -241,13 +177,7 @@ impl IntrinsicType { let mask = !0u64 >> (64 - *bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - if (twos_compl == src) && (language == &Language::C) { - // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid - // undefined literal overflow behaviour. - fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) - } else { - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) - } + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) } else { fmt(&format_args!("{body_indentation}{src:#x}")) } @@ -261,20 +191,11 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) { - (&Language::Rust, 16) => ('[', "f16::from_bits(", ")", ']'), - (&Language::Rust, 32) => ('[', "f32::from_bits(", ")", ']'), - (&Language::Rust, 64) => ('[', "f64::from_bits(", ")", ']'), - (&Language::C, 16) => ('{', "cast(", ")", '}'), - (&Language::C, 32) => ('{', "cast(", ")", '}'), - (&Language::C, 64) => ('{', "cast(", ")", '}'), - _ => unreachable!(), - }; format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| fmt(&format_args!( - "{indentation}{cast_prefix}{src:#x}{cast_suffix}", + "{indentation}f{bit_len}::from_bits({src:#x})", indentation = indentation.nested(), src = value_for_array(*bit_len, i) ))) @@ -287,14 +208,10 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, suffix) = match language { - Language::Rust => ('[', ']'), - Language::C => ('{', '}'), - }; let body_indentation = indentation.nested(); let effective_bit_len = 32; format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(effective_bit_len, i); @@ -304,13 +221,7 @@ impl IntrinsicType { let mask = !0u64 >> (64 - effective_bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - if (twos_compl == src) && (language == &Language::C) { - // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid - // undefined literal overflow behaviour. - fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) - } else { - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) - } + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) } else { fmt(&format_args!("{body_indentation}{src:#x}")) } @@ -320,57 +231,20 @@ impl IntrinsicType { _ => unimplemented!("populate random: {self:#?}"), } } - - pub fn is_rust_vals_array_const(&self) -> bool { - match self { - // Floats have to be loaded at runtime for stable NaN conversion. - IntrinsicType { - kind: TypeKind::Float, - .. - } => false, - IntrinsicType { - kind: TypeKind::Int(_) | TypeKind::Poly, - .. - } => true, - _ => true, - } - } - - pub fn as_call_param_c(&self, name: &String) -> String { - if self.ptr { - format!("&{name}") - } else { - name.clone() - } - } } pub trait IntrinsicTypeDefinition: Deref { /// Determines the load function for this type. /// can be implemented in an `impl` block - fn get_load_function(&self, _language: Language) -> String; - - /// can be implemented in an `impl` block - fn get_lane_function(&self) -> String; + fn get_load_function(&self) -> String; /// Gets a string containing the typename for this type in C format. /// can be directly defined in `impl` blocks fn c_type(&self) -> String; + /// Gets a string containing the typename for this type in Rust format. /// can be directly defined in `impl` blocks - fn c_single_vector_type(&self) -> String; - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String; - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_rust(&self) -> String { - String::from("format_args!(\"{__return_value:.150?}\")") - } + fn rust_type(&self) -> String; /// To enable architecture-specific logic fn rust_scalar_type(&self) -> String { @@ -380,13 +254,4 @@ pub trait IntrinsicTypeDefinition: Deref { bits = self.inner_size() ) } - - fn generate_final_type_cast(&self) -> String { - let type_data = self.c_promotion(); - if type_data.len() > 2 { - format!("({type_data})") - } else { - String::new() - } - } } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index a1062b3a87..86849f7db3 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -1,38 +1,32 @@ -use std::fs::File; +use std::{fs::File, io}; use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ - compile_c::CppCompilation, - gen_c::{write_main_cpp, write_mod_cpp}, - gen_rust::{ - compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs, - write_main_rs, - }, + gen_c::write_wrapper_c, + gen_rust::{write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs}, intrinsic::Intrinsic, intrinsic_helpers::IntrinsicTypeDefinition, }; pub mod argument; pub mod cli; -pub mod compare; -pub mod compile_c; pub mod constraint; -pub mod gen_c; -pub mod gen_rust; -pub mod indentation; pub mod intrinsic; pub mod intrinsic_helpers; -pub mod values; + +mod gen_c; +mod gen_rust; +mod indentation; +mod values; /// Architectures must support this trait /// to be successfully tested. pub trait SupportedArchitectureTest { type IntrinsicImpl: IntrinsicTypeDefinition + Sync; - fn cli_options(&self) -> &ProcessedCli; fn intrinsics(&self) -> &[Intrinsic]; fn create(cli_options: ProcessedCli) -> Self; @@ -40,118 +34,40 @@ pub trait SupportedArchitectureTest { const NOTICE: &str; const PLATFORM_C_HEADERS: &[&str]; - const PLATFORM_C_DEFINITIONS: &str; - const PLATFORM_C_FORWARD_DECLARATIONS: &str; const PLATFORM_RUST_CFGS: &str; const PLATFORM_RUST_DEFINITIONS: &str; - fn cpp_compilation(&self) -> Option; - - fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + fn arch_flags(&self) -> Vec<&str>; - let cpp_compiler_wrapped = self.cpp_compilation(); + fn generate_c_file(&self) { + let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); std::fs::create_dir_all("c_programs").unwrap(); self.intrinsics() .par_chunks(chunk_size) .enumerate() .map(|(i, chunk)| { - let c_filename = format!("c_programs/mod_{i}.cpp"); + let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - let mod_file_write_result = write_mod_cpp( - &mut file, - Self::NOTICE, - Self::PLATFORM_C_HEADERS, - Self::PLATFORM_C_FORWARD_DECLARATIONS, - chunk, - ); - - if let Err(error) = mod_file_write_result { - return Err(format!("Error writing to mod_{i}.cpp: {error:?}")); - } - - // compile this cpp file into a .o file. - // - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - trace!("compiling mod_{i}.cpp"); - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - let compile_output = cpp_compiler - .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o")) - .map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?; - - assert!( - compile_output.status.success(), - "{}", - String::from_utf8_lossy(&compile_output.stderr) - ); - - trace!("finished compiling mod_{i}.cpp"); - } - Ok(()) + write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) }) - .collect::>() + .collect::>() .unwrap(); - - let mut file = File::create("c_programs/main.cpp").unwrap(); - write_main_cpp( - &mut file, - Self::PLATFORM_C_DEFINITIONS, - Self::PLATFORM_C_HEADERS, - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - // compile this cpp file into a .o file - trace!("compiling main.cpp"); - let output = cpp_compiler - .compile_object_file("main.cpp", "intrinsic-test-programs.o") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - - let object_files = (0..chunk_count) - .map(|i| format!("mod_{i}.o")) - .chain(["intrinsic-test-programs.o".to_owned()]); - - let output = cpp_compiler - .link_executable(object_files, "intrinsic-test-programs") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - } - - true } - fn build_rust_file(&self) -> bool { - std::fs::create_dir_all("rust_programs/src").unwrap(); + fn generate_rust_file(&self) { + let arch_flags = self.arch_flags(); - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + std::fs::create_dir_all("rust_programs").unwrap(); + + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len()); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); - let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); - write_main_rs( - &mut main_rs, - chunk_count, - Self::PLATFORM_RUST_CFGS, - "", - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - let target = &self.cli_options().target; - let profile = &self.cli_options().profile; - let toolchain = self.cli_options().toolchain.as_deref(); - let linker = self.cli_options().linker.as_deref(); - self.intrinsics() - .par_chunks(chunk_size) + .chunks(chunk_size) .enumerate() .map(|(i, chunk)| { std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?; @@ -165,6 +81,7 @@ pub trait SupportedArchitectureTest { Self::NOTICE, Self::PLATFORM_RUST_CFGS, Self::PLATFORM_RUST_DEFINITIONS, + i, chunk, )?; @@ -174,41 +91,20 @@ pub trait SupportedArchitectureTest { write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?; + let build_rs_filename = format!("rust_programs/mod_{i}/build.rs"); + trace!("generating `{build_rs_filename}`"); + let mut file = File::create(build_rs_filename).unwrap(); + + write_build_rs(&mut file, i, &arch_flags).unwrap(); + Ok(()) }) .collect::>() .unwrap(); - - compile_rust_programs(toolchain, target, profile, linker) - } - - fn compare_outputs(&self) -> bool { - if self.cli_options().toolchain.is_some() { - let intrinsics_name_list = self - .intrinsics() - .iter() - .map(|i| i.name.clone()) - .collect::>(); - - compare::compare_outputs( - &intrinsics_name_list, - &self.cli_options().runner, - &self.cli_options().target, - &self.cli_options().profile, - ) - } else { - true - } } } -// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) { -// let available_parallelism = std::thread::available_parallelism().unwrap().get(); -// let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); - -// (chunk_size, intrinsic_count.div_ceil(chunk_size)) -// } - -pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) { - (chunk_size, intrinsic_count.div_ceil(chunk_size)) +pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { + let ncores = std::thread::available_parallelism().unwrap().into(); + (intrinsic_count.div_ceil(ncores), ncores) } diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index e5c846877c..9f57c99f12 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -15,27 +15,21 @@ fn main() { let args: Cli = clap::Parser::parse(); let processed_cli_options = ProcessedCli::new(args); - match processed_cli_options.target.as_str() { - "aarch64-unknown-linux-gnu" - | "armv7-unknown-linux-gnueabihf" - | "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)), - - "x86_64-unknown-linux-gnu" => run(X86ArchitectureTest::create(processed_cli_options)), - _ => std::process::exit(0), + if processed_cli_options.target.starts_with("arm") + | processed_cli_options.target.starts_with("aarch64") + { + run(ArmArchitectureTest::create(processed_cli_options)) + } else if processed_cli_options.target.starts_with("x86") { + run(X86ArchitectureTest::create(processed_cli_options)) + } else { + unimplemented!("Unsupported target {}", processed_cli_options.target) } } fn run(test_environment: impl SupportedArchitectureTest) { info!("building C binaries"); - if !test_environment.build_c_file() { - std::process::exit(2); - } + test_environment.generate_c_file(); + info!("building Rust binaries"); - if !test_environment.build_rust_file() { - std::process::exit(3); - } - info!("Running binaries"); - if !test_environment.compare_outputs() { - std::process::exit(1); - } + test_environment.generate_rust_file(); } diff --git a/crates/intrinsic-test/src/x86/compile.rs b/crates/intrinsic-test/src/x86/compile.rs deleted file mode 100644 index 65cd291b1b..0000000000 --- a/crates/intrinsic-test/src/x86/compile.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::common::cli::ProcessedCli; -use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; - -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { - let cpp_compiler = config.cpp_compiler.as_ref()?; - - // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations - let mut command = CompilationCommandBuilder::new() - .add_arch_flags(["icelake-client"]) - .set_compiler(cpp_compiler) - .set_target(&config.target) - .set_opt_level("2") - .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) - .set_project_root("c_programs") - .add_extra_flags(vec![ - "-ffp-contract=off", - "-Wno-narrowing", - "-mavx", - "-mavx2", - "-mavx512f", - "-msse2", - "-mavx512vl", - "-mavx512bw", - "-mavx512dq", - "-mavx512cd", - "-mavx512fp16", - "-msha512", - "-msm3", - "-msm4", - "-mavxvnni", - "-mavxvnniint8", - "-mavxneconvert", - "-mavxifma", - "-mavxvnniint16", - "-mavx512bf16", - "-mavx512bitalg", - "-mavx512ifma", - "-mavx512vbmi", - "-mavx512vbmi2", - "-mavx512vnni", - "-mavx512vpopcntdq", - "-mavx512vp2intersect", - "-mbmi", - "-mbmi2", - "-mgfni", - "-mvaes", - "-mvpclmulqdq", - "-ferror-limit=1000", - "-std=c++23", - ]); - - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } - - let cpp_compiler = command.into_cpp_compilation(); - - Some(cpp_compiler) -} diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs index 491dbb5147..68737ab5ac 100644 --- a/crates/intrinsic-test/src/x86/config.rs +++ b/crates/intrinsic-test/src/x86/config.rs @@ -3,7 +3,6 @@ pub const NOTICE: &str = "\ // test are derived from an XML specification, published under the same license as the // `intrinsic-test` crate.\n"; -// Format f16 values (and vectors containing them) in a way that is consistent with C. pub const PLATFORM_RUST_DEFINITIONS: &str = r#" use core_arch::arch::x86_64::*; @@ -129,206 +128,11 @@ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) } -#[inline] -fn debug_simd_finish( - formatter: &mut core::fmt::Formatter<'_>, - type_name: &str, - array: &[T; N], -) -> core::fmt::Result { - core::fmt::Formatter::debug_tuple_fields_finish( - formatter, - type_name, - &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]), - ) -} - -trait DebugAs { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; -} - -impl DebugAs for T { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{self}") - } -} - -macro_rules! impl_debug_as { - ($simd:ty, $name:expr, $bits:expr, [$($type:ty),+]) => { - $( - impl DebugAs<$type> for $simd { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - const ELEMENT_BITS: usize = core::mem::size_of::<$type>() * 8; - const NUM_ELEMENTS: usize = $bits / ELEMENT_BITS; - let array = unsafe { core::mem::transmute::<_, [$type; NUM_ELEMENTS]>(*self) }; - debug_simd_finish(f, $name, &array) - } - } - )+ - }; -} - -impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64, f16]); -impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]); -impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]); -impl_debug_as!(__m128h, "__m128h", 128, [f32]); -impl_debug_as!(__m256h, "__m256h", 256, [f32]); -impl_debug_as!(__m512h, "__m512h", 512, [f32]); - -fn debug_as(x: V) -> impl core::fmt::Debug -where V: DebugAs -{ - struct DebugWrapper(V, core::marker::PhantomData); - impl, T> core::fmt::Debug for DebugWrapper { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.0.fmt(f) - } - } - DebugWrapper(x, core::marker::PhantomData) -} - -"#; - -pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" -#ifndef X86_DECLARATIONS -#define X86_DECLARATIONS - typedef _Float16 float16_t; - typedef float float32_t; - typedef double float64_t; - - #define __int64 long long - #define __int32 int - - std::ostream& operator<<(std::ostream& os, _Float16 value); - std::ostream& operator<<(std::ostream& os, __m128i value); - std::ostream& operator<<(std::ostream& os, __m256i value); - std::ostream& operator<<(std::ostream& os, __m512i value); - std::ostream& operator<<(std::ostream& os, __mmask8 value); - - #define _mm512_extract_intrinsic_test_epi8(m, lane) \ - _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) - - #define _mm512_extract_intrinsic_test_epi16(m, lane) \ - _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8) - - #define _mm512_extract_intrinsic_test_epi32(m, lane) \ - _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4) - - #define _mm512_extract_intrinsic_test_epi64(m, lane) \ - _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2) - - // Load f16 (__m128h) and cast to integer (__m128i) - #define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr)) - #define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) - #define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) - - // Load f32 (__m128) and cast to f16 (__m128h) - #define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr)) - #define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) - #define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) - - // Load integer types and cast to double (__m128d, __m256d, __m512d) - #define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - // Load integer types and cast to float (__m128, __m256, __m512) - #define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - // T1 is the `To` type, T2 is the `From` type - template T1 cast(T2 x) { - if constexpr ((std::is_integral_v && std::is_integral_v) || (std::is_floating_point_v && std::is_floating_point_v)) { - return x; - } else if constexpr (sizeof(T1) <= sizeof(T2)) { - T1 ret{}; - std::memcpy(&ret, &x, sizeof(T1)); - return ret; - } else { - static_assert(sizeof(T1) == sizeof(T2) || std::is_convertible_v, - "T2 must either be convertible to T1, or have the same size as T1!"); - return T1{}; - } - } -#endif -"#; -pub const PLATFORM_C_DEFINITIONS: &str = r#" - -std::ostream& operator<<(std::ostream& os, _Float16 value) { - os << static_cast(value); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m128i value) { - void* temp = malloc(sizeof(__m128i)); - _mm_storeu_si128((__m128i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 16; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m256i value) { - void* temp = malloc(sizeof(__m256i)); - _mm256_storeu_si256((__m256i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 32; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m512i value) { - void* temp = malloc(sizeof(__m512i)); - _mm512_storeu_si512((__m512i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 64; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __mmask8 value) { - os << static_cast(value); - return os; -} "#; pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "x86", feature(avx))] -#![cfg_attr(target_arch = "x86", feature(sse))] -#![cfg_attr(target_arch = "x86", feature(sse2))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))] -#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))] -#![feature(fmt_helpers_for_derive)] +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] "#; diff --git a/crates/intrinsic-test/src/x86/constraint.rs b/crates/intrinsic-test/src/x86/constraint.rs index 72f5da3b3f..608ffdd1ee 100644 --- a/crates/intrinsic-test/src/x86/constraint.rs +++ b/crates/intrinsic-test/src/x86/constraint.rs @@ -1,7 +1,10 @@ use crate::common::constraint::Constraint; -pub fn map_constraints(imm_type: &String, imm_width: u32) -> Option { +pub fn map_constraints(fn_name: &str, imm_type: &String, imm_width: u32) -> Option { if imm_width > 0 { + if fn_name == "_mm_sm3rnds2_epi32" { + return Some(Constraint::Set((0..64).step_by(2).collect())); + } let max: i64 = 2i64.pow(imm_width); return Some(Constraint::Range(0..max)); } diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index f2baf07071..5d4798482a 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -1,4 +1,3 @@ -mod compile; mod config; mod constraint; mod intrinsic; @@ -7,7 +6,6 @@ mod xml_parser; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; @@ -15,33 +13,59 @@ use xml_parser::get_xml_intrinsics; pub struct X86ArchitectureTest { intrinsics: Vec>, - cli_options: ProcessedCli, } impl SupportedArchitectureTest for X86ArchitectureTest { type IntrinsicImpl = X86IntrinsicType; - fn cli_options(&self) -> &ProcessedCli { - &self.cli_options - } - fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } - fn cpp_compilation(&self) -> Option { - compile::build_cpp_compilation(&self.cli_options) - } - const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"]; - const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; + const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; + fn arch_flags(&self) -> Vec<&str> { + vec![ + "-mavx", + "-mavx2", + "-mavx512f", + "-msse2", + "-mavx512vl", + "-mavx512bw", + "-mavx512dq", + "-mavx512cd", + "-mavx512fp16", + "-msha", + "-msha512", + "-msm3", + "-msm4", + "-mavxvnni", + "-mavxvnniint8", + "-mavxneconvert", + "-mavxifma", + "-mavxvnniint16", + "-mavx512bf16", + "-mavx512bitalg", + "-mavx512ifma", + "-mavx512vbmi", + "-mavx512vbmi2", + "-mavx512vnni", + "-mavx512vpopcntdq", + "-mavx512vp2intersect", + "-mbmi", + "-mbmi2", + "-mgfni", + "-mvaes", + "-mvpclmulqdq", + "-mlzcnt", + ] + } + fn create(cli_options: ProcessedCli) -> Self { let mut intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); @@ -67,9 +91,6 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .take(sample_size) .collect::>(); - Self { - intrinsics: intrinsics, - cli_options: cli_options, - } + Self { intrinsics } } } diff --git a/crates/intrinsic-test/src/x86/types.rs b/crates/intrinsic-test/src/x86/types.rs index 2391ee9c2d..cd7c41e06f 100644 --- a/crates/intrinsic-test/src/x86/types.rs +++ b/crates/intrinsic-test/src/x86/types.rs @@ -1,11 +1,8 @@ use std::str::FromStr; use itertools::Itertools; -use regex::Regex; use super::intrinsic::X86IntrinsicType; -use crate::common::cli::Language; -use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; use crate::x86::xml_parser::Parameter; @@ -26,82 +23,16 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .replace("const ", "") } - fn c_single_vector_type(&self) -> String { - // matches __m128, __m256 and similar types - let re = Regex::new(r"__m\d+").unwrap(); - if re.is_match(self.param.type_data.as_str()) { + fn rust_type(&self) -> String { + if self.is_simd() { self.param.type_data.clone() } else { - unreachable!("Shouldn't be called on this type") + format!("{}{}", self.kind.rust_prefix(), self.inner_size()) } } - // fn rust_type(&self) -> String { - // // handling edge cases first - // // the general handling is implemented below - // if let Some(val) = self.metadata.get("type") { - // match val.as_str() { - // "__m128 const *" => { - // return "&__m128".to_string(); - // } - // "__m128d const *" => { - // return "&__m128d".to_string(); - // } - // "const void*" => { - // return "&__m128d".to_string(); - // } - // _ => {} - // } - // } - - // if self.kind() == TypeKind::Void && self.ptr { - // // this has been handled by default settings in - // // the from_param function of X86IntrinsicType - // unreachable!() - // } - - // // general handling cases - // let core_part = if self.kind() == TypeKind::Mask { - // // all types of __mmask are handled here - // format!("__mask{}", self.bit_len.unwrap()) - // } else if self.simd_len.is_some() { - // // all types of __m vector types are handled here - // let re = Regex::new(r"\__m\d+[a-z]*").unwrap(); - // let rust_type = self - // .metadata - // .get("type") - // .map(|val| re.find(val).unwrap().as_str()); - // rust_type.unwrap().to_string() - // } else { - // format!( - // "{}{}", - // self.kind.rust_prefix().to_string(), - // self.bit_len.unwrap() - // ) - // }; - - // // extracting "memsize" so that even vector types can be involved - // let memwidth = self - // .metadata - // .get("memwidth") - // .map(|n| str::parse::(n).unwrap()); - // let prefix_part = if self.ptr && self.constant && self.bit_len.eq(&memwidth) { - // "&" - // } else if self.ptr && self.bit_len.eq(&memwidth) { - // "&mut " - // } else if self.ptr && self.constant { - // "*const " - // } else if self.ptr { - // "*mut " - // } else { - // "" - // }; - - // return prefix_part.to_string() + core_part.as_str(); - // } - /// Determines the load function for this type. - fn get_load_function(&self, _language: Language) -> String { + fn get_load_function(&self) -> String { let type_value = self.param.type_data.clone(); if type_value.len() == 0 { unimplemented!("the value for key 'type' is not present!"); @@ -168,82 +99,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { } } - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.num_lanes() > 1 { - (0..self.num_lanes()) - .map(|idx| -> std::string::String { - let cast_type = self.c_promotion(); - let lane_fn = self.get_lane_function(); - if cast_type.len() > 2 { - format!("cast<{cast_type}>({lane_fn}(__return_value, {idx}))") - } else { - format!("{lane_fn}(__return_value, {idx})") - } - }) - .collect::>() - .join(r#" << ", " << "#) - } else { - format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.kind() { - TypeKind::Void => "void".to_string(), - TypeKind::Float if self.inner_size() == 64 => "double".to_string(), - TypeKind::Float if self.inner_size() == 32 => "float".to_string(), - TypeKind::Mask => format!( - "__mmask{}", - self.bit_len.expect(format!("self: {self:#?}").as_str()) - ), - TypeKind::Vector => format!( - "__m{}i", - self.bit_len.expect(format!("self: {self:#?}").as_str()) - ), - _ => self.c_scalar_type(), - }, - promote = self.generate_final_type_cast(), - ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.is_simd() { - format!("{}(", self.c_type()) - } else { - String::from("") - }, - close = if self.is_simd() { ")" } else { "" }, - ) - } - - /// Determines the get lane function for this type. - fn get_lane_function(&self) -> String { - let total_vector_bits: Option = self - .simd_len - .zip(self.bit_len) - .and_then(|(simd_len, bit_len)| Some(simd_len * bit_len)); - - match (self.bit_len, total_vector_bits) { - (Some(8), Some(128)) => String::from("(uint8_t)_mm_extract_epi8"), - (Some(16), Some(128)) => String::from("(uint16_t)_mm_extract_epi16"), - (Some(32), Some(128)) => String::from("(uint32_t)_mm_extract_epi32"), - (Some(64), Some(128)) => String::from("(uint64_t)_mm_extract_epi64"), - (Some(8), Some(256)) => String::from("(uint8_t)_mm256_extract_epi8"), - (Some(16), Some(256)) => String::from("(uint16_t)_mm256_extract_epi16"), - (Some(32), Some(256)) => String::from("(uint32_t)_mm256_extract_epi32"), - (Some(64), Some(256)) => String::from("(uint64_t)_mm256_extract_epi64"), - (Some(8), Some(512)) => String::from("(uint8_t)_mm512_extract_intrinsic_test_epi8"), - (Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"), - (Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"), - (Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"), - _ => unreachable!( - "invalid length for vector argument: {:?}, {:?}", - self.bit_len, self.simd_len - ), - } - } - fn rust_scalar_type(&self) -> String { let prefix = match self.data.kind { TypeKind::Mask => String::from("__mmask"), @@ -258,23 +113,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { }; format!("{prefix}{bits}") } - - fn print_result_rust(&self) -> String { - let return_value = match self.kind() { - // `_mm{256}_cvtps_ph` has return type __m128i but contains f16 values - TypeKind::Float if self.param.type_data == "__m128i" => { - "format_args!(\"{:.150?}\", debug_as::<_, f16>(__return_value))".to_string() - } - TypeKind::Int(_) - if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => - { - format!("debug_as::<_, u{}>(__return_value)", self.inner_size()) - } - _ => "format_args!(\"{__return_value:.150?}\")".to_string(), - }; - - return_value - } } impl X86IntrinsicType { diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index 681b1a3c52..6006d7919f 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -99,7 +99,7 @@ fn xml_to_intrinsic( } else { param.imm_width }; - let constraint = map_constraints(¶m.imm_type, effective_imm_width); + let constraint = map_constraints(&name, ¶m.imm_type, effective_imm_width); let arg = Argument::::new( i, param.var_name.clone(), diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 401bb504b3..e2ede548ec 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -1767,9 +1767,9 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h_u16_f16'] - ["f16", "u32", 'h_u32_f16'] - ["f16", "u64", 'h_u64_f16'] - compose: - LLVMLink: name: "vcvta{type[2]}" @@ -1789,6 +1789,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h_s16_f16'] - ["f16", "i32", 'h_s32_f16'] - ["f16", "i64", 'h_s64_f16'] compose: @@ -1799,37 +1800,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - - name: "vcvta{type[2]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to away" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h_s16_f16', 's32'] - compose: - - 'vcvtah_{type[3]}_f16(a) as i16' - - - name: "vcvta{type[2]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to away" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h_u16_f16', 'u32'] - compose: - - 'vcvtah_{type[3]}_f16(a) as u16' - - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] @@ -1939,6 +1909,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -1949,22 +1920,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtnh_{type[3]}_f16(a) as i16' - - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" arguments: ["a: {type[0]}"] @@ -1976,6 +1931,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -1986,21 +1942,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtnh_{type[3]}_f16(a) as u16' - - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to signed integer, rounding toward minus infinity" arguments: ["a: {neon_type[0]}"] @@ -2291,6 +2232,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -2301,21 +2243,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding to plus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtph_{type[3]}_f16(a) as i16' - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding to plus infinity" arguments: ["a: {type[0]}"] @@ -2327,6 +2254,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -2337,21 +2265,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to plus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtph_{type[3]}_f16(a) as u16' - - name: "vdup{neon_type.laneq_nox}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type}"] @@ -2361,6 +2274,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['1']] - *neon-stable static_defs: ['const N: i32'] + big_endian_inverse: true safety: safe types: - poly64x2_t @@ -2378,6 +2292,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['1']] - *neon-stable static_defs: ['const N: i32'] + big_endian_inverse: true safety: safe types: - [poly64x1_t, poly64x2_t] @@ -2569,6 +2484,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] - *neon-stable static_defs: ['const N: i32'] + big_endian_inverse: true safety: safe types: - poly64x2_t @@ -3595,7 +3511,6 @@ intrinsics: return_type: "{neon_type[1]}" attr: [*neon-stable] assert_instr: [ld2] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -3942,7 +3857,6 @@ intrinsics: arguments: ["a: {type[0]}"] return_type: "{neon_type[1]}" attr: [*neon-stable] - big_endian_inverse: false safety: unsafe: [neon] assert_instr: [ld3] @@ -4081,7 +3995,6 @@ intrinsics: return_type: "{neon_type[1]}" attr: [*neon-stable] assert_instr: [ld4] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4189,7 +4102,6 @@ intrinsics: - *neon-stable static_defs: - "const LANE: i32" - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4239,7 +4151,6 @@ intrinsics: - *neon-stable static_defs: - "const LANE: i32" - big_endian_inverse: false safety: unsafe: [neon] types: @@ -5797,6 +5708,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -5817,6 +5729,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -5839,6 +5752,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -5860,6 +5774,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -5879,6 +5794,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -5901,6 +5817,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -5920,6 +5837,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -5942,6 +5860,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -5961,6 +5880,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -5984,6 +5904,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6003,6 +5924,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6015,6 +5937,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].laneq_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] - name: "vcmla{neon_type[0].laneq_nox}" doc: Floating-point complex multiply accumulate @@ -6049,6 +5994,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6061,6 +6007,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - name: "vcmla{neon_type[0].rot90_laneq}" doc: Floating-point complex multiply accumulate @@ -6095,6 +6064,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6107,6 +6077,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - name: "vcmla{neon_type[0].rot90_lane}" doc: Floating-point complex multiply accumulate @@ -6141,6 +6134,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -6164,6 +6158,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6184,6 +6179,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6196,6 +6192,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot180_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - name: "vcmla{neon_type[0].rot180_laneq}" doc: Floating-point complex multiply accumulate @@ -6232,6 +6251,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6244,6 +6264,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + + - name: "vcmla{type[3]}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]', '_rot180_lane_f32'] + - [float32x4_t, float32x2_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]', 'q_rot180_lane_f32'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - name: "vcmla{type[3]}" doc: Floating-point complex multiply accumulate @@ -6280,6 +6323,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6292,6 +6336,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - name: "vcmla{neon_type[0].rot270_laneq}" doc: Floating-point complex multiply accumulate @@ -6326,6 +6393,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6338,6 +6406,29 @@ intrinsics: - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].lane_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] - name: "vcmla{neon_type[0].lane_nox}" @@ -6373,6 +6464,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-fcma + - *cfg-little-endian static_defs: ["const LANE: i32"] safety: safe types: @@ -6382,6 +6474,26 @@ intrinsics: - FnCall: [static_assert!, ['LANE == 0']] - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + - *cfg-big-endian + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1, 2 * (1 - LANE) as u32, 2 * (1 - LANE) as u32 + 1]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - name: "vcmla{neon_type[0].rot270_lane}" doc: Floating-point complex multiply accumulate @@ -6767,6 +6879,7 @@ intrinsics: attr: [*neon-stable] assert_instr: [faddp] safety: safe + big_endian_inverse: true types: - [float32x4_t, "4"] - [float64x2_t, "2"] @@ -6789,6 +6902,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [faddp] safety: safe + big_endian_inverse: true types: - [float16x8_t, "8"] compose: @@ -6810,6 +6924,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmaxp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6831,6 +6946,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmaxnmp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6852,6 +6968,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fminp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6873,6 +6990,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fminnmp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6912,6 +7030,7 @@ intrinsics: attr: [*neon-stable] assert_instr: [fminp] safety: safe + big_endian_inverse: true types: - ["s_f32", float32x2_t, f32] - ["qd_f64", float64x2_t, f64] @@ -7167,7 +7286,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal]]}]] - *neon-stable safety: safe types: @@ -7195,7 +7314,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-stable static_defs: ['const LANE: i32'] @@ -7281,7 +7400,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl]]}]] - *neon-stable safety: safe types: @@ -7309,7 +7428,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-stable static_defs: ['const LANE: i32'] @@ -8523,7 +8642,6 @@ intrinsics: - link: "llvm.aarch64.neon.frecpx.{type[1]}" arch: aarch64,arm64ec - - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -8540,54 +8658,65 @@ intrinsics: - [poly64x2_t, uint64x2_t] - [int64x2_t, poly64x2_t] - [uint64x2_t, poly64x2_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + - [float64x1_t, poly64x1_t] + - [float64x2_t, poly64x2_t] + - [int64x1_t, float64x1_t] + - [int64x2_t, float64x2_t] + - [uint64x1_t, float64x1_t] + - [uint64x2_t, float64x2_t] + - [poly64x1_t, float64x1_t] + - [poly64x2_t, float64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [nop] + safety: safe + types: - [float64x1_t, int8x8_t] - [float64x1_t, int16x4_t] - [float64x1_t, int32x2_t] - - [float64x1_t, int64x1_t] - [float64x2_t, int8x16_t] - [float64x2_t, int16x8_t] - [float64x2_t, int32x4_t] - - [float64x2_t, int64x2_t] - [float64x1_t, uint8x8_t] - [float64x1_t, uint16x4_t] - [float64x1_t, uint32x2_t] - - [float64x1_t, uint64x1_t] - [float64x2_t, uint8x16_t] - [float64x2_t, uint16x8_t] - [float64x2_t, uint32x4_t] - - [float64x2_t, uint64x2_t] - [float64x1_t, poly8x8_t] - [float64x1_t, poly16x4_t] - [float32x2_t, poly64x1_t] - - [float64x1_t, poly64x1_t] - [float64x2_t, poly8x16_t] - [float64x2_t, poly16x8_t] - [float32x4_t, poly64x2_t] - - [float64x2_t, poly64x2_t] - [float64x2_t, p128] - [int8x8_t, float64x1_t] - [int16x4_t, float64x1_t] - [int32x2_t, float64x1_t] - - [int64x1_t, float64x1_t] - [int8x16_t, float64x2_t] - [int16x8_t, float64x2_t] - [int32x4_t, float64x2_t] - - [int64x2_t, float64x2_t] - [poly8x8_t, float64x1_t] - [uint16x4_t, float64x1_t] - [uint32x2_t, float64x1_t] - - [uint64x1_t, float64x1_t] - [poly8x16_t, float64x2_t] - [uint16x8_t, float64x2_t] - [uint32x4_t, float64x2_t] - - [uint64x2_t, float64x2_t] - [uint8x8_t, float64x1_t] - [poly16x4_t, float64x1_t] - - [poly64x1_t, float64x1_t] - [poly64x1_t, float32x2_t] - [uint8x16_t, float64x2_t] - [poly16x8_t, float64x2_t] - - [poly64x2_t, float64x2_t] - [poly64x2_t, float32x4_t] - [p128, float64x2_t] - [float32x2_t, float64x1_t] @@ -8873,6 +9002,7 @@ intrinsics: - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe + big_endian_inverse: true types: - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1'] compose: @@ -8935,6 +9065,7 @@ intrinsics: - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe + big_endian_inverse: true types: - [poly64x2_t, poly64x1_t] compose: @@ -8983,6 +9114,7 @@ intrinsics: - *neon-stable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["f64", float64x1_t, float64x1_t] compose: @@ -8999,6 +9131,7 @@ intrinsics: - *neon-stable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["f64", float64x2_t, float64x2_t] compose: @@ -9112,6 +9245,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw1]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9130,6 +9264,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw2]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9148,6 +9283,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3ss1]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9166,6 +9302,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4ekey]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9184,6 +9321,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4e]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9220,6 +9358,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9238,6 +9377,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h2]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9256,6 +9396,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su0]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9274,6 +9415,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su1]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9294,6 +9436,7 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] static_defs: ["const IMM2: i32"] safety: safe + big_endian_inverse: true types: - ['1aq_u32', uint32x4_t, 'sm3tt1a', 'SM3TT1A'] - ['1bq_u32', uint32x4_t, 'sm3tt1b', 'SM3TT1B'] @@ -9538,6 +9681,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] - [int8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]'] @@ -9568,6 +9712,7 @@ intrinsics: - *target-not-arm64ec - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[0, 4, 2, 6]'] - [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] @@ -9582,6 +9727,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[0, 2]'] - [int64x2_t, '[0, 2]'] @@ -9601,6 +9747,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] - [int8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] @@ -9630,6 +9777,7 @@ intrinsics: - *target-not-arm64ec - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[1, 5, 3, 7]'] - [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] @@ -9644,6 +9792,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[1, 3]'] - [int64x2_t, '[1, 3]'] @@ -9663,6 +9812,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] - [int8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] @@ -9699,6 +9849,7 @@ intrinsics: - *target-not-arm64ec - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[2, 6, 3, 7]'] - [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] @@ -9713,6 +9864,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] - [int8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]'] @@ -9750,6 +9902,7 @@ intrinsics: - *target-not-arm64ec - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[0, 4, 1, 5]'] - [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] @@ -9764,6 +9917,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[0, 2]'] - [int64x2_t, '[0, 2]'] @@ -9783,6 +9937,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] - [int8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]'] @@ -9812,6 +9967,7 @@ intrinsics: - *target-not-arm64ec - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[0, 2, 4, 6]'] - [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] @@ -9826,6 +9982,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[1, 3]'] - [int64x2_t, '[1, 3]'] @@ -9845,6 +10002,7 @@ intrinsics: - *neon-stable - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] - [int8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] @@ -9878,6 +10036,7 @@ intrinsics: - *target-not-arm64ec - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[1, 3, 5, 7]'] - [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] @@ -10035,6 +10194,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - float32x2_t - float64x2_t @@ -11419,6 +11579,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - ['s_f32', float32x2_t, "f32"] - ['qd_f64', float64x2_t, "f64"] @@ -11437,6 +11598,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - ['s_f32', float32x2_t, "f32"] - ['qd_f64', float64x2_t, "f64"] @@ -11668,6 +11830,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - float32x2_t - float64x2_t @@ -11774,6 +11937,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -11784,22 +11948,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding towards minus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtmh_{type[3]}_f16(a) as i16' - - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding towards minus infinity" arguments: ["a: {type[0]}"] @@ -11811,6 +11959,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -11821,21 +11970,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding towards minus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtmh_{type[3]}_f16(a) as u16' - - name: "vmlal_high_n_{neon_type[1]}" doc: "Multiply-add long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] @@ -12004,7 +12138,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [int8x8_t, 'int8x8x4_t', 'int8x16x2', 'int8x8', 'i8x8::splat(32)'] @@ -12035,7 +12168,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [uint8x8_t, 'uint8x8x4_t', 'uint8x8_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(32)'] @@ -12068,7 +12200,6 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe - big_endian_inverse: false types: - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}'] - [uint8x8_t, 'uint8x8_t', 'b'] @@ -12089,7 +12220,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - ['int8x8x2_t', 'int8x8_t', 'int8x8_t'] @@ -12106,7 +12236,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - ['uint8x8x2_t', 'uint8x8_t', 'uint8x8_t'] @@ -12125,7 +12254,6 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe - big_endian_inverse: false types: - ['int8x8x3_t', 'int8x8_t', 'int8x8_t', 'int8x16x2'] compose: @@ -12149,7 +12277,6 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe - big_endian_inverse: false types: - [uint8x8x3_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2'] - [poly8x8x3_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2'] @@ -12173,7 +12300,6 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe - big_endian_inverse: false types: - ['int8x8x4_t', 'int8x8_t', 'int8x8_t', 'int8x16x2'] compose: @@ -12196,7 +12322,6 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe - big_endian_inverse: false types: - [uint8x8x4_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2'] - [poly8x8x4_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2'] @@ -12220,6 +12345,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1] - [int8x16_t, int8x16_t, uint8x16_t, vqtbx1q] @@ -12239,7 +12365,7 @@ intrinsics: - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"] - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"] - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"] - big_endian_inverse: false + big_endian_inverse: true compose: - FnCall: - transmute @@ -12256,7 +12382,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [int8x8_t, "int8x8_t", "int8x8", "i8x8::splat(8)"] @@ -12281,7 +12406,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [uint8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"] @@ -12307,7 +12431,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [int8x8_t, 'int8x8x2_t', 'int8x8', 'i8x8::splat(16)'] @@ -12332,7 +12455,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [uint8x8_t, 'uint8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)'] @@ -12358,7 +12480,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'int8x8', 'i8x8::splat(24)'] @@ -12389,7 +12510,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(24)'] @@ -12422,6 +12542,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t'] - ['int8x16_t', uint8x16_t, 'vqtbl1q', 'int8x16_t'] @@ -12441,7 +12562,7 @@ intrinsics: - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t'] - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t'] - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t'] - big_endian_inverse: false + big_endian_inverse: true compose: - FnCall: - transmute @@ -12458,6 +12579,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t'] - ['int8x16x2_t', uint8x16_t, 'vqtbl2q', 'int8x16_t'] @@ -12471,7 +12593,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t'] @@ -12495,6 +12617,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2'] - [int8x16_t, 'int8x16x2_t', uint8x16_t, 'vqtbx2q'] @@ -12508,7 +12631,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2'] @@ -12532,7 +12655,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3'] @@ -12547,7 +12670,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3'] @@ -12572,6 +12695,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3'] - [int8x16_t, 'int8x16x3_t', uint8x16_t, 'vqtbx3q'] @@ -12585,7 +12709,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3'] @@ -12610,7 +12734,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t'] @@ -12625,7 +12749,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t'] @@ -12651,6 +12775,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4'] - [int8x16_t, 'int8x16x4_t', uint8x16_t, 'vqtbx4q'] @@ -12664,7 +12789,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - *neon-stable - big_endian_inverse: false + big_endian_inverse: true safety: safe types: - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4'] @@ -12729,7 +12854,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t] @@ -12749,7 +12873,6 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - *neon-stable - big_endian_inverse: false safety: safe types: - ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t] @@ -13113,6 +13236,7 @@ intrinsics: - *neon-stable assert_instr: [addp] safety: safe + big_endian_inverse: true types: - [int8x16_t, "16"] - [int16x8_t, "8"] @@ -13401,6 +13525,7 @@ intrinsics: - *neon-stable assert_instr: ['sminp'] safety: safe + big_endian_inverse: true types: - int8x16_t - int16x8_t @@ -13420,6 +13545,7 @@ intrinsics: - *neon-stable assert_instr: ['uminp'] safety: safe + big_endian_inverse: true types: - uint8x16_t - uint16x8_t @@ -13439,6 +13565,7 @@ intrinsics: - *neon-stable assert_instr: ['fminp'] safety: safe + big_endian_inverse: true types: - float32x4_t - float64x2_t @@ -13457,6 +13584,7 @@ intrinsics: - *neon-stable assert_instr: ['smaxp'] safety: safe + big_endian_inverse: true types: - int8x16_t - int16x8_t @@ -13476,6 +13604,7 @@ intrinsics: - *neon-stable assert_instr: ['umaxp'] safety: safe + big_endian_inverse: true types: - uint8x16_t - uint16x8_t @@ -13495,6 +13624,7 @@ intrinsics: - *neon-stable assert_instr: ['fmaxp'] safety: safe + big_endian_inverse: true types: - float32x4_t - float64x2_t @@ -13649,6 +13779,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlal2] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_high_'] - [float32x4_t, float16x8_t, 'q_high_'] @@ -13697,6 +13828,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlal] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_low_'] - [float32x4_t, float16x8_t, 'q_low_'] @@ -13745,6 +13877,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlsl2] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_high_'] - [float32x4_t, float16x8_t, 'q_high_'] @@ -13792,6 +13925,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlsl] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_low_'] - [float32x4_t, float16x8_t, 'q_low_'] @@ -14253,8 +14387,9 @@ intrinsics: - *neon-stable assert_instr: ['{type[3]}'] safety: safe + big_endian_inverse: true types: - - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'fmov', 'float64x1_t([simd_extract!(a, 1)])'] + - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 1)])'] - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])'] compose: - Identifier: ['{type[4]}', UnsafeSymbol] @@ -14267,6 +14402,7 @@ intrinsics: - *neon-stable assert_instr: [mov] safety: safe + big_endian_inverse: true types: - [float64x1_t, float64x2_t, '[0, 1]'] compose: @@ -14281,6 +14417,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['1']] assert_instr: [['nop', 'IMM5 = 0']] safety: safe + big_endian_inverse: true static_defs: ["const IMM5: i32"] types: - ['float64x2_t', 'f64'] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 1f7e1f6987..e8c5c3aac2 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -1442,6 +1442,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]'] - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]'] @@ -1466,6 +1467,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]'] - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]'] @@ -1490,6 +1492,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]'] - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]'] @@ -1514,6 +1517,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]'] - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]'] @@ -1541,6 +1545,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]'] - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]'] @@ -1581,6 +1586,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]'] - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]'] @@ -1602,6 +1608,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_s32, int32x2_t, int32x2_t, '1', '[N as u32, N as u32]'] - [q_lane_s32, int32x2_t, int32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]'] @@ -1626,6 +1633,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] - [_laneq_s32, int32x4_t, int32x2_t, '2', '[N as u32, N as u32]'] @@ -1650,6 +1658,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s64, int64x2_t, '1', '[N as u32, N as u32]'] - [q_laneq_u64, uint64x2_t, '1', '[N as u32, N as u32]'] @@ -1670,6 +1679,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_lane_s64, int64x1_t, int64x2_t] - [q_lane_u64, uint64x1_t, uint64x2_t] @@ -1732,6 +1742,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - int8x8_t - int16x8_t @@ -1756,6 +1767,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - int8x16_t - uint8x16_t @@ -1777,6 +1789,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - int16x4_t - int32x4_t @@ -1804,6 +1817,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - float16x4_t compose: @@ -1825,6 +1839,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - float16x8_t compose: @@ -1846,6 +1861,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - int32x2_t - uint32x2_t @@ -1867,6 +1883,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - int64x2_t - uint64x2_t @@ -2685,7 +2702,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -2744,7 +2760,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -2835,7 +2850,7 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]] - - name: "vld1{neon_type[1].dup_nox}" + - name: "vld1{type[2]}_{neon_type[1]}" doc: "Load one single-element structure and replicate to all lanes of one register" arguments: ["ptr: {type[0]}"] return_type: "{neon_type[1]}" @@ -2849,11 +2864,11 @@ intrinsics: safety: unsafe: [neon] types: - - ["*const f16", float16x4_t, '_lane', 'f16x4'] - - ["*const f16", float16x8_t, 'q_laneq', 'f16x8'] + - ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"] + - ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"] compose: - - Let: [x, {FnCall: ["vld1{neon_type[1].lane_nox}", [ptr, {FnCall: [transmute, ["{type[3]}::splat(0.0)"]]}], [0]]}] - - FnCall: ['vdup{type[2]}_{neon_type[1]}', [x], [0]] + - Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.0)))"] + - FnCall: [simd_shuffle!, [x, x, "{type[4]}"]] - name: "vld2{neon_type[1].nox}" @@ -2982,7 +2997,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -3013,7 +3027,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -3110,7 +3123,6 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: - "const LANE: i32" - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4099,7 +4111,6 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4130,7 +4141,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4494,7 +4504,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4616,7 +4625,6 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -7482,6 +7490,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - float32x2_t compose: @@ -7506,6 +7515,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - float16x4_t compose: @@ -8477,32 +8487,46 @@ intrinsics: - *neon-cfg-arm-unstable safety: safe types: - - [poly64x1_t, int32x2_t] - - [poly64x1_t, uint32x2_t] - - [poly64x2_t, int32x4_t] - - [poly64x2_t, uint32x4_t] - [p128, int64x2_t] - [p128, uint64x2_t] - [p128, poly64x2_t] - - [poly8x16_t, p128] - [p128, int8x16_t] - [p128, uint8x16_t] - [p128, poly8x16_t] + - [poly64x2_t, p128] + - [p128, int32x4_t] + - [p128, uint32x4_t] + - [p128, int16x8_t] + - [p128, uint16x8_t] + - [p128, poly16x8_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [poly64x1_t, int32x2_t] + - [poly64x1_t, uint32x2_t] + - [poly8x16_t, p128] - [int32x2_t, poly64x1_t] - [uint32x2_t, poly64x1_t] - [int32x4_t, poly64x2_t] - [uint32x4_t, poly64x2_t] - [int64x2_t, p128] - [uint64x2_t, p128] - - [poly64x2_t, p128] - [poly64x1_t, int16x4_t] - [poly64x1_t, uint16x4_t] - [poly64x1_t, poly16x4_t] - - [poly64x2_t, int16x8_t] - - [poly64x2_t, uint16x8_t] - [poly64x2_t, poly16x8_t] - - [p128, int32x4_t] - - [p128, uint32x4_t] - [poly16x4_t, poly64x1_t] - [int16x4_t, poly64x1_t] - [uint16x4_t, poly64x1_t] @@ -8514,12 +8538,7 @@ intrinsics: - [poly64x1_t, int8x8_t] - [poly64x1_t, uint8x8_t] - [poly64x1_t, poly8x8_t] - - [poly64x2_t, int8x16_t] - - [poly64x2_t, uint8x16_t] - [poly64x2_t, poly8x16_t] - - [p128, int16x8_t] - - [p128, uint16x8_t] - - [p128, poly16x8_t] - [poly8x8_t, poly64x1_t] - [int8x8_t, poly64x1_t] - [uint8x8_t, poly64x1_t] @@ -8531,53 +8550,12 @@ intrinsics: - [poly16x8_t, p128] - [int8x16_t, p128] - [uint8x16_t, p128] - compose: - - FnCall: [transmute, [a]] - - - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" - doc: Vector reinterpret cast operation - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - *neon-v7 - - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] - - *neon-not-arm-stable - - *neon-cfg-arm-unstable - safety: safe - types: - - [uint8x8_t, int8x8_t] - - [poly8x8_t, int8x8_t] - - [poly16x4_t, int16x4_t] - - [uint16x4_t, int16x4_t] - - [uint32x2_t, int32x2_t] - - [uint64x1_t, int64x1_t] - - [uint8x16_t, int8x16_t] - - [poly8x16_t, int8x16_t] - - [poly16x8_t, int16x8_t] - - [uint16x8_t, int16x8_t] - - [uint32x4_t, int32x4_t] - - [uint64x2_t, int64x2_t] - - [poly8x8_t, uint8x8_t] - - [int8x8_t, uint8x8_t] - - [poly16x4_t, uint16x4_t] - - [int16x4_t, uint16x4_t] - - [int32x2_t, uint32x2_t] - - [int64x1_t, uint64x1_t] - - [poly8x16_t, uint8x16_t] - - [int8x16_t, uint8x16_t] - - [poly16x8_t, uint16x8_t] - - [int16x8_t, uint16x8_t] - - [int32x4_t, uint32x4_t] - - [int64x2_t, uint64x2_t] - - [int8x8_t, poly8x8_t] - - [uint8x8_t, poly8x8_t] - - [int16x4_t, poly16x4_t] - - [uint16x4_t, poly16x4_t] - - [int8x16_t, poly8x16_t] - - [uint8x16_t, poly8x16_t] - - [int16x8_t, poly16x8_t] - - [uint16x8_t, poly16x8_t] + - [poly64x2_t, int32x4_t] + - [poly64x2_t, uint32x4_t] + - [poly64x2_t, int16x8_t] + - [poly64x2_t, uint16x8_t] + - [poly64x2_t, int8x16_t] + - [poly64x2_t, uint8x16_t] - [int16x4_t, int8x8_t] - [uint16x4_t, int8x8_t] - [poly16x4_t, int8x8_t] @@ -8728,19 +8706,15 @@ intrinsics: - [uint8x16_t, uint64x2_t] - [float32x2_t, int8x8_t] - [float32x2_t, int16x4_t] - - [float32x2_t, int32x2_t] - [float32x2_t, int64x1_t] - [float32x4_t, int8x16_t] - [float32x4_t, int16x8_t] - - [float32x4_t, int32x4_t] - [float32x4_t, int64x2_t] - [float32x2_t, uint8x8_t] - [float32x2_t, uint16x4_t] - - [float32x2_t, uint32x2_t] - [float32x2_t, uint64x1_t] - [float32x4_t, uint8x16_t] - [float32x4_t, uint16x8_t] - - [float32x4_t, uint32x4_t] - [float32x4_t, uint64x2_t] - [float32x2_t, poly8x8_t] - [float32x2_t, poly16x4_t] @@ -8749,19 +8723,15 @@ intrinsics: - [float32x4_t, p128] - [int8x8_t, float32x2_t] - [int16x4_t, float32x2_t] - - [int32x2_t, float32x2_t] - [int64x1_t, float32x2_t] - [int8x16_t, float32x4_t] - [int16x8_t, float32x4_t] - - [int32x4_t, float32x4_t] - [int64x2_t, float32x4_t] - [uint8x8_t, float32x2_t] - [uint16x4_t, float32x2_t] - - [uint32x2_t, float32x2_t] - [uint64x1_t, float32x2_t] - [uint8x16_t, float32x4_t] - [uint16x8_t, float32x4_t] - - [uint32x4_t, float32x4_t] - [uint64x2_t, float32x4_t] - [poly8x8_t, float32x2_t] - [poly16x4_t, float32x2_t] @@ -8771,6 +8741,60 @@ intrinsics: compose: - FnCall: [transmute, [a]] + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [poly8x8_t, int8x8_t] + - [poly16x4_t, int16x4_t] + - [uint16x4_t, int16x4_t] + - [uint32x2_t, int32x2_t] + - [uint64x1_t, int64x1_t] + - [uint8x16_t, int8x16_t] + - [poly8x16_t, int8x16_t] + - [poly16x8_t, int16x8_t] + - [uint16x8_t, int16x8_t] + - [uint32x4_t, int32x4_t] + - [uint64x2_t, int64x2_t] + - [poly8x8_t, uint8x8_t] + - [int8x8_t, uint8x8_t] + - [poly16x4_t, uint16x4_t] + - [int16x4_t, uint16x4_t] + - [int32x2_t, uint32x2_t] + - [int64x1_t, uint64x1_t] + - [poly8x16_t, uint8x16_t] + - [int8x16_t, uint8x16_t] + - [poly16x8_t, uint16x8_t] + - [int16x8_t, uint16x8_t] + - [int32x4_t, uint32x4_t] + - [int64x2_t, uint64x2_t] + - [int8x8_t, poly8x8_t] + - [uint8x8_t, poly8x8_t] + - [int16x4_t, poly16x4_t] + - [uint16x4_t, poly16x4_t] + - [int8x16_t, poly8x16_t] + - [uint8x16_t, poly8x16_t] + - [int16x8_t, poly16x8_t] + - [uint16x8_t, poly16x8_t] + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] + compose: + - FnCall: [transmute, [a]] - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation @@ -8787,54 +8811,73 @@ intrinsics: types: # non-q - [float32x2_t, float16x4_t] - - [poly16x4_t, float16x4_t] - [poly8x8_t, float16x4_t] - [int8x8_t, float16x4_t] - - [int16x4_t, float16x4_t] - [int32x2_t, float16x4_t] - [int64x1_t, float16x4_t] - [uint8x8_t, float16x4_t] - - [uint16x4_t, float16x4_t] - [uint32x2_t, float16x4_t] - [uint64x1_t, float16x4_t] - [float16x4_t, float32x2_t] - - [float16x4_t, poly16x4_t] - [float16x4_t, poly8x8_t] - [float16x4_t, int8x8_t] - - [float16x4_t, int16x4_t] - [float16x4_t, int32x2_t] - [float16x4_t, int64x1_t] - [float16x4_t, uint8x8_t] - - [float16x4_t, uint16x4_t] - [float16x4_t, uint32x2_t] - [float16x4_t, uint64x1_t] # q - [float32x4_t, float16x8_t] - - [poly16x8_t, float16x8_t] - [poly8x16_t, float16x8_t] - [int8x16_t, float16x8_t] - - [int16x8_t, float16x8_t] - [int32x4_t, float16x8_t] - [int64x2_t, float16x8_t] - [uint8x16_t, float16x8_t] - - [uint16x8_t, float16x8_t] - [uint32x4_t, float16x8_t] - [uint64x2_t, float16x8_t] - [float16x8_t, float32x4_t] - - [float16x8_t, poly16x8_t] - [float16x8_t, poly8x16_t] - [float16x8_t, int8x16_t] - - [float16x8_t, int16x8_t] - [float16x8_t, int32x4_t] - [float16x8_t, int64x2_t] - [float16x8_t, uint8x16_t] - - [float16x8_t, uint16x8_t] - [float16x8_t, uint32x4_t] - [float16x8_t, uint64x2_t] compose: - FnCall: [transmute, [a]] + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable-fp16 + - *neon-cfg-arm-unstable + - *target-not-arm64ec + safety: safe + types: + # non-q + - [poly16x4_t, float16x4_t] + - [int16x4_t, float16x4_t] + - [uint16x4_t, float16x4_t] + - [float16x4_t, poly16x4_t] + - [float16x4_t, int16x4_t] + - [float16x4_t, uint16x4_t] + # q + - [poly16x8_t, float16x8_t] + - [int16x8_t, float16x8_t] + - [uint16x8_t, float16x8_t] + - [float16x8_t, poly16x8_t] + - [float16x8_t, int16x8_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -8871,6 +8914,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, "[3, 2, 1, 0]"] - [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"] @@ -9223,6 +9267,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["i8", int8x8_t, '3'] - ["i16", int16x4_t, '2'] @@ -9263,6 +9308,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["f16", float16x4_t, '2'] - ["f16", float16x8_t, '3'] @@ -9284,6 +9330,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["i64", int64x1_t, int64x1_t] - ["u64", uint64x1_t, uint64x1_t] @@ -9305,6 +9352,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["p64", poly64x1_t, poly64x1_t] compose: @@ -9325,6 +9373,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["p64", poly64x2_t, poly64x2_t] compose: @@ -9651,6 +9700,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] - [int16x4_t, int16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] @@ -9695,6 +9745,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] - [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] @@ -9724,6 +9775,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] @@ -9753,6 +9805,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x16_t, int8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] - [int16x8_t, int16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] @@ -9788,6 +9841,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] @@ -9817,6 +9871,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] - [int16x4_t, int16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] @@ -9852,6 +9907,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] - [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] @@ -9880,6 +9936,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] - [int16x4_t, int16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] @@ -9924,6 +9981,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] - [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] @@ -9953,6 +10011,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]'] - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] @@ -12472,6 +12531,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [float32x2_t, float32x4_t, '[0, 1, 2, 3]'] - [poly8x8_t, poly8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] @@ -12499,6 +12559,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - uint8x16_t compose: @@ -12521,6 +12582,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - uint8x16_t compose: @@ -12543,6 +12605,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint8x16_t, "aesmc"] compose: @@ -12565,6 +12628,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint8x16_t, "aesimc"] compose: @@ -12587,6 +12651,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1h"] compose: @@ -12609,6 +12674,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1c", "uint32x4_t"] compose: @@ -12631,6 +12697,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1m", "uint32x4_t"] compose: @@ -12653,6 +12720,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1p", "uint32x4_t"] compose: @@ -12675,6 +12743,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha1su0"] compose: @@ -12697,6 +12766,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha1su1"] compose: @@ -12719,6 +12789,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256h"] compose: @@ -12741,6 +12812,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256h2"] compose: @@ -12763,6 +12835,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256su0"] compose: @@ -12785,6 +12858,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256su1"] compose: @@ -13022,6 +13096,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - int8x8_t - int16x4_t @@ -13046,6 +13121,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - uint8x8_t - uint16x4_t @@ -13070,6 +13146,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - float32x2_t compose: @@ -13092,6 +13169,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - int8x8_t - int16x4_t @@ -13116,6 +13194,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - uint8x8_t - uint16x4_t @@ -13140,6 +13219,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - float32x2_t compose: @@ -13260,6 +13340,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - int8x8_t - int16x4_t @@ -13277,7 +13358,6 @@ intrinsics: doc: "Add pairwise." arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[0]}" - big_endian_inverse: false attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] @@ -14111,6 +14191,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [nop] safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x8_t] compose: @@ -14128,6 +14209,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [nop] safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"] - [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"] @@ -14148,6 +14230,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - [float16x4_t, f16, '_lane_f16', '2'] - [float16x8_t, f16, 'q_lane_f16', '3'] @@ -14302,7 +14385,6 @@ intrinsics: doc: "Load one single-element structure and Replicate to all lanes (of one register)." arguments: ["ptr: {type[1]}"] return_type: "{neon_type[2]}" - big_endian_inverse: false attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] } ]] @@ -14900,6 +14982,7 @@ intrinsics: arguments: ["v: {neon_type[1]}"] return_type: "{type[2]}" safety: safe + big_endian_inverse: true static_defs: ['const IMM5: i32'] attr: - *neon-v7 @@ -14938,6 +15021,7 @@ intrinsics: arguments: ["v: {neon_type[1]}"] return_type: "{type[2]}" safety: safe + big_endian_inverse: true static_defs: ['const IMM5: i32'] attr: - *neon-v7 @@ -14984,6 +15068,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'int64x1_t([simd_extract!(a, 1)])'] - ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'uint64x1_t([simd_extract!(a, 1)])'] @@ -15001,6 +15086,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'int64x1_t([simd_extract!(a, 0)])'] - ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'uint64x1_t([simd_extract!(a, 0)])'] @@ -15019,6 +15105,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vget_high_s8', 'int8x16_t', 'int8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] - ['vget_high_u8', 'uint8x16_t', 'uint8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] @@ -15042,6 +15129,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vget_low_s8', 'int8x16_t', 'int8x8_t', '[0, 1, 2, 3, 4, 5, 6, 7]'] - ['vget_low_u8', 'uint8x16_t', 'uint8x8_t','[0, 1, 2, 3, 4, 5, 6, 7]'] @@ -15194,6 +15282,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vrev16_s8', 'int8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]'] - ['vrev16q_s8', 'int8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]'] diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs index dc467fd307..72fb97fee1 100644 --- a/crates/stdarch-gen-arm/src/intrinsic.rs +++ b/crates/stdarch-gen-arm/src/intrinsic.rs @@ -1059,23 +1059,8 @@ impl Intrinsic { /// Add a big endian implementation fn generate_big_endian(&self, variant: &mut Intrinsic) { - /* We can't always blindly reverse the bits only in certain conditions - * do we need a different order - thus this allows us to have the - * ability to do so without having to play codegolf with the yaml AST */ - let should_reverse = { - if let Some(should_reverse) = variant.big_endian_inverse { - should_reverse - } else if variant.compose.len() == 1 { - match &variant.compose[0] { - Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute", - _ => false, - } - } else { - false - } - }; - - if !should_reverse { + // We only reverse if it was specifically requested + if !variant.big_endian_inverse.unwrap_or(false) { return; }