From aa94a08e6a2316ce2b4d7778ed04f2a1cf70890d Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 29 Oct 2023 23:44:54 +0000 Subject: [PATCH 01/22] Fixes for use in the standard library --- crates/core_arch/src/arm_shared/neon/mod.rs | 9 +++------ crates/std_detect/src/lib.rs | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 5b43549d34..2d12f5e99b 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -8707,8 +8707,7 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { /// 8-bit integer matrix multiply-accumulate #[inline] -#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smmla))] @@ -8735,8 +8734,7 @@ pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t /// 8-bit integer matrix multiply-accumulate #[inline] -#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ummla))] @@ -8763,8 +8761,7 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x /// Unsigned and signed 8-bit integer matrix multiply-accumulate #[inline] -#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usmmla))] diff --git a/crates/std_detect/src/lib.rs b/crates/std_detect/src/lib.rs index 9bdd647313..f13dca7cc6 100644 --- a/crates/std_detect/src/lib.rs +++ b/crates/std_detect/src/lib.rs @@ -24,6 +24,7 @@ // Remove this as soon as the stdarch submodule is updated on nightly. #![allow(stable_features)] #![feature(stdsimd)] +#![cfg_attr(feature = "rustc-dep-of-std", feature(stdarch_x86_has_cpuid))] #[cfg(test)] #[macro_use] From 10b0fcd88f53fee38d3d7c19a6a58cccb070d134 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 4 Jan 2024 11:25:38 +0000 Subject: [PATCH 02/22] Fix std build failure on non-x86 architectures This is more fallout from #1486 --- crates/std_detect/src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/std_detect/src/lib.rs b/crates/std_detect/src/lib.rs index f13dca7cc6..40c1aa4667 100644 --- a/crates/std_detect/src/lib.rs +++ b/crates/std_detect/src/lib.rs @@ -24,7 +24,13 @@ // Remove this as soon as the stdarch submodule is updated on nightly. #![allow(stable_features)] #![feature(stdsimd)] -#![cfg_attr(feature = "rustc-dep-of-std", feature(stdarch_x86_has_cpuid))] +#![cfg_attr( + all( + any(target_arch = "x86", target_arch = "x86_64"), + feature = "rustc-dep-of-std" + ), + feature(stdarch_x86_has_cpuid) +)] #[cfg(test)] #[macro_use] From 5ef6eb42bdcfef6891517a6e4c77a89c18722f18 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 4 Jan 2024 14:35:45 +0000 Subject: [PATCH 03/22] Fix std_detect not being an unstable crate More fallout from #1486 --- crates/core_arch/src/lib.rs | 2 +- crates/std_detect/src/lib.rs | 6 +++--- crates/std_detect/tests/macro_trailing_commas.rs | 12 ++++++++++++ crates/std_detect/tests/x86-specific.rs | 2 ++ examples/connect5.rs | 5 +++-- examples/hex.rs | 5 +++++ 6 files changed, 26 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index bd4de67445..5dcd11fb68 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -35,7 +35,7 @@ inline_const, generic_arg_infer )] -#![cfg_attr(test, feature(test, abi_vectorcall))] +#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))] #![deny(clippy::missing_inline_in_public_items)] #![allow( clippy::identity_op, diff --git a/crates/std_detect/src/lib.rs b/crates/std_detect/src/lib.rs index 40c1aa4667..19cc021712 100644 --- a/crates/std_detect/src/lib.rs +++ b/crates/std_detect/src/lib.rs @@ -13,7 +13,7 @@ //! * `powerpc64`: [`is_powerpc64_feature_detected`] //! * `loongarch`: [`is_loongarch_feature_detected`] -#![stable(feature = "stdsimd", since = "1.27.0")] +#![unstable(feature = "stdarch_internal", issue = "none")] #![feature(staged_api, doc_cfg, allow_internal_unstable)] #![deny(rust_2018_idioms)] #![allow(clippy::shadow_reuse)] @@ -23,7 +23,7 @@ // Temporary hack: needed to build against toolchains from before the mass feature renaming. // Remove this as soon as the stdarch submodule is updated on nightly. #![allow(stable_features)] -#![feature(stdsimd)] +#![cfg_attr(not(feature = "rustc-dep-of-std"), feature(stdsimd))] #![cfg_attr( all( any(target_arch = "x86", target_arch = "x86_64"), @@ -42,5 +42,5 @@ extern crate std; extern crate alloc; #[doc(hidden)] -#[stable(feature = "stdsimd", since = "1.27.0")] +#[unstable(feature = "stdarch_internal", issue = "none")] pub mod detect; diff --git a/crates/std_detect/tests/macro_trailing_commas.rs b/crates/std_detect/tests/macro_trailing_commas.rs index 8304b225f5..d37629ec0a 100644 --- a/crates/std_detect/tests/macro_trailing_commas.rs +++ b/crates/std_detect/tests/macro_trailing_commas.rs @@ -1,3 +1,15 @@ +#![allow(internal_features)] +#![cfg_attr( + any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" + ), + feature(stdarch_internal) +)] #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] diff --git a/crates/std_detect/tests/x86-specific.rs b/crates/std_detect/tests/x86-specific.rs index 54bcab7b1e..1d8d8ba2ec 100644 --- a/crates/std_detect/tests/x86-specific.rs +++ b/crates/std_detect/tests/x86-specific.rs @@ -1,4 +1,6 @@ #![cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#![allow(internal_features)] +#![feature(stdarch_internal)] extern crate cupid; #[macro_use] diff --git a/examples/connect5.rs b/examples/connect5.rs index 53e9b8124d..a569689fad 100644 --- a/examples/connect5.rs +++ b/examples/connect5.rs @@ -28,9 +28,10 @@ //! You should see a game self-playing. In the end of the game, it shows the average time for //! each move. +#![allow(internal_features)] #![feature(avx512_target_feature)] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512))] -#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512))] +#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512, stdarch_internal))] +#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512, stdarch_internal))] #![feature(stmt_expr_attributes)] use rand::seq::SliceRandom; diff --git a/examples/hex.rs b/examples/hex.rs index 490556e8bf..b73a306f5f 100644 --- a/examples/hex.rs +++ b/examples/hex.rs @@ -12,8 +12,13 @@ //! //! and you should see `746573740a` get printed out. +#![allow(internal_features)] #![feature(wasm_target_feature)] #![cfg_attr(test, feature(test))] +#![cfg_attr( + any(target_arch = "x86", target_arch = "x86_64"), + feature(stdarch_internal) +)] #![allow( clippy::unwrap_used, clippy::print_stdout, From b01e4a060e0bd0ac81d83822ce7d36b865602bc5 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Fri, 5 Jan 2024 16:38:48 +0000 Subject: [PATCH 04/22] Add vec_xst --- crates/core_arch/src/powerpc/altivec.rs | 77 +++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 544fce89ab..410d41d420 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -606,6 +606,57 @@ mod sealed { impl_vec_xl! { vec_xl_u32 lxvd2x / lxv u32 } impl_vec_xl! { vec_xl_f32 lxvd2x / lxv f32 } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXst { + type Out; + unsafe fn vec_xst(self, a: isize, p: Self::Out); + } + + macro_rules! impl_vec_xst { + ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, not(target_feature = "power9-altivec")), + assert_instr($notpwr9) + )] + #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] + pub unsafe fn $fun(s: t_t_l!($ty), a: isize, b: *mut $ty) { + let addr = (b as *mut u8).offset(a); + + // Workaround ptr::copy_nonoverlapping not being inlined + extern "rust-intrinsic" { + #[rustc_nounwind] + pub fn copy_nonoverlapping(src: *const T, dst: *mut T, count: usize); + } + + copy_nonoverlapping( + &s as *const _ as *const u8, + addr, + mem::size_of::(), + ); + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXst for t_t_l!($ty) { + type Out = *mut $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_xst(self, a: isize, b: Self::Out) { + $fun(self, a, b) + } + } + }; + } + + impl_vec_xst! { vec_xst_i8 stxvd2x / stxv i8 } + impl_vec_xst! { vec_xst_u8 stxvd2x / stxv u8 } + impl_vec_xst! { vec_xst_i16 stxvd2x / stxv i16 } + impl_vec_xst! { vec_xst_u16 stxvd2x / stxv u16 } + impl_vec_xst! { vec_xst_i32 stxvd2x / stxv i32 } + impl_vec_xst! { vec_xst_u32 stxvd2x / stxv u32 } + impl_vec_xst! { vec_xst_f32 stxvd2x / stxv f32 } + test_impl! { vec_floor(a: vector_float) -> vector_float [ vfloor, vrfim / xvrspim ] } test_impl! { vec_vexptefp(a: vector_float) -> vector_float [ vexptefp, vexptefp ] } @@ -2692,6 +2743,17 @@ where p.vec_xl(off) } +/// VSX Unaligned Store +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xst(v: T, off: isize, p: ::Out) +where + T: sealed::VectorXst, +{ + v.vec_xst(off, p) +} + /// Vector Base-2 Logarithm Estimate #[inline] #[target_feature(enable = "altivec")] @@ -3579,6 +3641,21 @@ mod tests { } } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_xst() { + let v: vector_unsigned_char = transmute(u8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + )); + + for off in 0..16 { + let mut buf = [0u8; 32]; + vec_xst(v, 0, (buf.as_mut_ptr() as *mut u8).offset(off)); + for i in 0..16 { + assert_eq!(i as u8, buf[off as usize..][i]); + } + } + } + #[simd_test(enable = "altivec")] unsafe fn test_vec_ldl() { let pat = [ From f56215a0245c54f450b77193049493f880cbd1aa Mon Sep 17 00:00:00 2001 From: eupn <36292692+eupn@users.noreply.github.com> Date: Wed, 10 Jan 2024 16:44:04 +0400 Subject: [PATCH 05/22] Add missing ARM-v7A CRC intrinsics (#1515) * Move aarch64 crc into arm shared module * Add missing 32-bit arm crc intrinsics On 32-bit ARM, this intrinsic emits two instructions and splits its 64-bit input parameter between them. https://gcc.gnu.org/onlinedocs/gcc-4.9.4/gcc/ARM-ACLE-Intrinsics.html --- crates/core_arch/src/aarch64/crc.rs | 51 ------------------ crates/core_arch/src/aarch64/mod.rs | 4 -- crates/core_arch/src/arm_shared/crc.rs | 74 ++++++++++++++++++++++++++ crates/intrinsic-test/missing_arm.txt | 2 - 4 files changed, 74 insertions(+), 57 deletions(-) delete mode 100644 crates/core_arch/src/aarch64/crc.rs diff --git a/crates/core_arch/src/aarch64/crc.rs b/crates/core_arch/src/aarch64/crc.rs deleted file mode 100644 index 35940e0db8..0000000000 --- a/crates/core_arch/src/aarch64/crc.rs +++ /dev/null @@ -1,51 +0,0 @@ -extern "unadjusted" { - #[link_name = "llvm.aarch64.crc32x"] - fn crc32x_(crc: u32, data: u64) -> u32; - - #[link_name = "llvm.aarch64.crc32cx"] - fn crc32cx_(crc: u32, data: u64) -> u32; -} - -#[cfg(test)] -use stdarch_test::assert_instr; - -/// CRC32 single round checksum for quad words (64 bits). -/// -/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d) -#[inline] -#[target_feature(enable = "crc")] -#[cfg_attr(test, assert_instr(crc32x))] -#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] -pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { - crc32x_(crc, data) -} - -/// CRC32-C single round checksum for quad words (64 bits). -/// -/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd) -#[inline] -#[target_feature(enable = "crc")] -#[cfg_attr(test, assert_instr(crc32cx))] -#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] -pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { - crc32cx_(crc, data) -} - -#[cfg(test)] -mod tests { - use crate::core_arch::{aarch64::*, simd::*}; - use std::mem; - use stdarch_test::simd_test; - - #[simd_test(enable = "crc")] - unsafe fn test_crc32d() { - assert_eq!(__crc32d(0, 0), 0); - assert_eq!(__crc32d(0, 18446744073709551615), 1147535477); - } - - #[simd_test(enable = "crc")] - unsafe fn test_crc32cd() { - assert_eq!(__crc32cd(0, 0), 0); - assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501); - } -} diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index fefd2f4780..ebd7a31781 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -17,10 +17,6 @@ mod tme; #[unstable(feature = "stdarch_aarch64_tme", issue = "117216")] pub use self::tme::*; -mod crc; -#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] -pub use self::crc::*; - mod prefetch; #[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")] pub use self::prefetch::*; diff --git a/crates/core_arch/src/arm_shared/crc.rs b/crates/core_arch/src/arm_shared/crc.rs index b1f716e1aa..8eedd21696 100644 --- a/crates/core_arch/src/arm_shared/crc.rs +++ b/crates/core_arch/src/arm_shared/crc.rs @@ -18,6 +18,10 @@ extern "unadjusted" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cw")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn crc32cw_(crc: u32, data: u32) -> u32; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32x")] + fn crc32x_(crc: u32, data: u64) -> u32; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cx")] + fn crc32cx_(crc: u32, data: u64) -> u32; } #[cfg(test)] @@ -95,12 +99,82 @@ pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 { crc32cw_(crc, data) } +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "aarch64")] +#[cfg_attr(test, assert_instr(crc32x))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { + crc32x_(crc, data) +} + +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(crc32w))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { + // On 32-bit ARM this intrinsic emits a chain of two `crc32_w` instructions + // and truncates the data to 32 bits in both clang and gcc + crc32w_( + crc32w_(crc, (data & 0xffffffff) as u32), + (data >> 32) as u32, + ) +} + +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "aarch64")] +#[cfg_attr(test, assert_instr(crc32cx))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { + crc32cx_(crc, data) +} + +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(crc32cw))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { + // On 32-bit ARM this intrinsic emits a chain of two `crc32_cw` instructions + // and truncates the data to 32 bits in both clang and gcc + crc32cw_( + crc32cw_(crc, (data & 0xffffffff) as u32), + (data >> 32) as u32, + ) +} + #[cfg(test)] mod tests { use crate::core_arch::{arm_shared::*, simd::*}; use std::mem; use stdarch_test::simd_test; + #[simd_test(enable = "crc")] + unsafe fn test_crc32d() { + assert_eq!(__crc32d(0, 0), 0); + assert_eq!(__crc32d(0, 18446744073709551615), 1147535477); + } + + #[simd_test(enable = "crc")] + unsafe fn test_crc32cd() { + assert_eq!(__crc32cd(0, 0), 0); + assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501); + } + #[simd_test(enable = "crc")] unsafe fn test_crc32b() { assert_eq!(__crc32b(0, 0), 0); diff --git a/crates/intrinsic-test/missing_arm.txt b/crates/intrinsic-test/missing_arm.txt index 7439cd6e66..0ea4cec406 100644 --- a/crates/intrinsic-test/missing_arm.txt +++ b/crates/intrinsic-test/missing_arm.txt @@ -14,8 +14,6 @@ vbfmlaltq_laneq_f32 vbfmmlaq_f32 # Implemented in Clang and stdarch for A64 only even though CSV claims A32 support -__crc32d -__crc32cd vaddq_p64 vbsl_p64 vbslq_p64 From 07e276e3093a58ad3d9d5e369ed91ecce1784103 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 9 Jan 2024 00:17:33 +0900 Subject: [PATCH 06/22] Use latest version of actions/checkout action --- .github/workflows/main.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 014a9aca0f..56d11ae211 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: name: Check Style runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: ci/style.sh @@ -23,7 +23,7 @@ jobs: needs: [style] runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: ci/dox.sh @@ -43,7 +43,7 @@ jobs: needs: [style] runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml @@ -53,7 +53,7 @@ jobs: needs: [style] runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: RUST_STD_DETECT_UNSTABLE=avx cargo test --features=std_detect_env_override --manifest-path crates/std_detect/Cargo.toml env_override_no_avx @@ -164,7 +164,7 @@ jobs: os: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 with: submodules: recursive - name: Install Rust (rustup) @@ -221,7 +221,7 @@ jobs: name: Build std_detect runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: ./ci/build-std-detect.sh From 0fd054c6468753da21f3f35f039a4e0cc676640b Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 14 Jan 2024 12:37:13 +0000 Subject: [PATCH 07/22] Rename vec_splat_i* to the correct name --- crates/core_arch/src/powerpc/altivec.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 410d41d420..516cfc90fe 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -2963,11 +2963,11 @@ where } splat! { vec_splat_u8, u8, u8x16 [vspltisb, "Vector Splat to Unsigned Byte"] } -splat! { vec_splat_i8, i8, i8x16 [vspltisb, "Vector Splat to Signed Byte"] } +splat! { vec_splat_s8, i8, i8x16 [vspltisb, "Vector Splat to Signed Byte"] } splat! { vec_splat_u16, u16, u16x8 [vspltish, "Vector Splat to Unsigned Halfword"] } -splat! { vec_splat_i16, i16, i16x8 [vspltish, "Vector Splat to Signed Halfword"] } +splat! { vec_splat_s16, i16, i16x8 [vspltish, "Vector Splat to Signed Halfword"] } splat! { vec_splat_u32, u32, u32x4 [vspltisw, "Vector Splat to Unsigned Word"] } -splat! { vec_splat_i32, i32, i32x4 [vspltisw, "Vector Splat to Signed Word"] } +splat! { vec_splat_s32, i32, i32x4 [vspltisw, "Vector Splat to Signed Word"] } /// Vector splats. #[inline] @@ -4846,9 +4846,9 @@ mod tests { test_vec_splat! { test_vec_splat_u8, vec_splat_u8, u8x16, -1, u8::MAX } test_vec_splat! { test_vec_splat_u16, vec_splat_u16, u16x8, -1, u16::MAX } test_vec_splat! { test_vec_splat_u32, vec_splat_u32, u32x4, -1, u32::MAX } - test_vec_splat! { test_vec_splat_i8, vec_splat_i8, i8x16, -1, -1 } - test_vec_splat! { test_vec_splat_i16, vec_splat_i16, i16x8, -1, -1 } - test_vec_splat! { test_vec_splat_i32, vec_splat_i32, i32x4, -1, -1 } + test_vec_splat! { test_vec_splat_s8, vec_splat_s8, i8x16, -1, -1 } + test_vec_splat! { test_vec_splat_s16, vec_splat_s16, i16x8, -1, -1 } + test_vec_splat! { test_vec_splat_s32, vec_splat_s32, i32x4, -1, -1 } macro_rules! test_vec_sub { { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { From 2659a324f006956af1da147b5b9969ef055c2a5e Mon Sep 17 00:00:00 2001 From: Makoto Kato Date: Wed, 11 Jan 2023 15:58:49 +0900 Subject: [PATCH 08/22] Add CPU detection for macOS/aarch64. --- crates/std_detect/src/detect/mod.rs | 3 + .../std_detect/src/detect/os/macos/aarch64.rs | 98 +++++++++++++++++++ crates/std_detect/tests/cpu-detection.rs | 32 ++++++ 3 files changed, 133 insertions(+) create mode 100644 crates/std_detect/src/detect/os/macos/aarch64.rs diff --git a/crates/std_detect/src/detect/mod.rs b/crates/std_detect/src/detect/mod.rs index 5ce4e54e23..c938abf17d 100644 --- a/crates/std_detect/src/detect/mod.rs +++ b/crates/std_detect/src/detect/mod.rs @@ -66,6 +66,9 @@ cfg_if! { } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] { #[path = "os/windows/aarch64.rs"] mod os; + } else if #[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "libc"))] { + #[path = "os/macos/aarch64.rs"] + mod os; } else { #[path = "os/other.rs"] mod os; diff --git a/crates/std_detect/src/detect/os/macos/aarch64.rs b/crates/std_detect/src/detect/os/macos/aarch64.rs new file mode 100644 index 0000000000..d7ebd956d6 --- /dev/null +++ b/crates/std_detect/src/detect/os/macos/aarch64.rs @@ -0,0 +1,98 @@ +//! Run-time feature detection for aarch64 on macOS. + +use crate::detect::{cache, Feature}; + +#[inline] +fn _sysctlbyname(name: &str) -> bool { + use libc; + + let mut enabled: i32 = 0; + let mut enabled_len: usize = 4; + let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void; + + let ret = unsafe { + libc::sysctlbyname( + name.as_ptr() as *const i8, + enabled_ptr, + &mut enabled_len, + core::ptr::null_mut(), + 0, + ) + }; + + match ret { + 0 => enabled != 0, + _ => false, + } +} + +/// Try to read the features using sysctlbyname. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + let asimd = _sysctlbyname("hw.optional.AdvSIMD\0"); + let pmull = _sysctlbyname("hw.optional.arm.FEAT_PMULL\0"); + let fp = _sysctlbyname("hw.optional.floatingpoint\0"); + let fp16 = _sysctlbyname("hw.optional.arm.FEAT_FP16\0"); + let crc = _sysctlbyname("hw.optional.armv8_crc32\0"); + let lse = _sysctlbyname("hw.optional.arm.FEAT_LSE\0"); + let lse2 = _sysctlbyname("hw.optional.arm.FEAT_LSE2\0"); + let rdm = _sysctlbyname("hw.optional.arm.FEAT_RDM\0"); + let rcpc = _sysctlbyname("hw.optional.arm.FEAT_LRCPC\0"); + let rcpc2 = _sysctlbyname("hw.optional.arm.FEAT_LRCPC2\0"); + let dotprod = _sysctlbyname("hw.optional.arm.FEAT_DotProd\0"); + let fhm = _sysctlbyname("hw.optional.arm.FEAT_FHM\0"); + let flagm = _sysctlbyname("hw.optional.arm.FEAT_FlagM\0"); + let ssbs = _sysctlbyname("hw.optional.arm.FEAT_SSBS\0"); + let sb = _sysctlbyname("hw.optional.arm.FEAT_SB\0"); + let paca = _sysctlbyname("hw.optional.arm.FEAT_PAuth\0"); + let dpb = _sysctlbyname("hw.optional.arm.FEAT_DPB\0"); + let dpb2 = _sysctlbyname("hw.optional.arm.FEAT_DPB2\0"); + let frintts = _sysctlbyname("hw.optional.arm.FEAT_FRINTTS\0"); + let i8mm = _sysctlbyname("hw.optional.arm.FEAT_I8MM\0"); + let bf16 = _sysctlbyname("hw.optional.arm.FEAT_BF16\0"); + let bti = _sysctlbyname("hw.optional.arm.FEAT_BTI\0"); + let fcma = _sysctlbyname("hw.optional.arm.FEAT_FCMA\0"); + let aes = _sysctlbyname("hw.optional.arm.FEAT_AES\0"); + let sha1 = _sysctlbyname("hw.optional.arm.FEAT_SHA1\0"); + let sha2 = _sysctlbyname("hw.optional.arm.FEAT_SHA256\0"); + let sha3 = _sysctlbyname("hw.optional.arm.FEAT_SHA3\0"); + let sha512 = _sysctlbyname("hw.optional.arm.FEAT_SHA512\0"); + let jsconv = _sysctlbyname("hw.optional.arm.FEAT_JSCVT\0"); + + enable_feature(Feature::asimd, asimd); + enable_feature(Feature::pmull, pmull); + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fp16); + enable_feature(Feature::crc, crc); + enable_feature(Feature::lse, lse); + enable_feature(Feature::lse2, lse2); + enable_feature(Feature::rdm, rdm); + enable_feature(Feature::rcpc, rcpc); + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::dotprod, dotprod); + enable_feature(Feature::fhm, fhm); + enable_feature(Feature::flagm, flagm); + enable_feature(Feature::ssbs, ssbs); + enable_feature(Feature::sb, sb); + enable_feature(Feature::paca, paca); + enable_feature(Feature::dpb, dpb); + enable_feature(Feature::dpb2, dpb2); + enable_feature(Feature::frintts, frintts); + enable_feature(Feature::i8mm, i8mm); + enable_feature(Feature::bf16, bf16); + enable_feature(Feature::bti, bti); + enable_feature(Feature::fcma, fcma); + enable_feature(Feature::aes, aes); + enable_feature(Feature::jsconv, jsconv); + enable_feature(Feature::sha2, sha1 && sha2 && asimd); + enable_feature(Feature::sha3, sha512 && sha3 && asimd); + + value +} diff --git a/crates/std_detect/tests/cpu-detection.rs b/crates/std_detect/tests/cpu-detection.rs index cb57b849d6..1ad897a2e2 100644 --- a/crates/std_detect/tests/cpu-detection.rs +++ b/crates/std_detect/tests/cpu-detection.rs @@ -139,6 +139,38 @@ fn aarch64_bsd() { println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); } +#[test] +#[cfg(all(target_arch = "aarch64", target_os = "macos"))] +fn aarch64_macos() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {:?}", is_aarch64_feature_detected!("rcpc2")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("fhm: {:?}", is_aarch64_feature_detected!("fhm")); + println!("flagm: {:?}", is_aarch64_feature_detected!("flagm")); + println!("ssbs: {:?}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {:?}", is_aarch64_feature_detected!("sb")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("dpb: {:?}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {:?}", is_aarch64_feature_detected!("dpb2")); + println!("frintts: {:?}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm")); + println!("bf16: {:?}", is_aarch64_feature_detected!("bf16")); + println!("bti: {:?}", is_aarch64_feature_detected!("bti")); + println!("fcma: {:?}", is_aarch64_feature_detected!("fcma")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {:?}", is_aarch64_feature_detected!("sha3")); +} + #[test] #[cfg(all(target_arch = "powerpc", target_os = "linux"))] fn powerpc_linux() { From 99b06835913400d5682305393fae4cf3b1b8c067 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 14 Jan 2024 14:56:58 +0000 Subject: [PATCH 09/22] Add vec_sl --- crates/core_arch/src/powerpc/altivec.rs | 76 +++++++++++++++++++++++++ crates/core_arch/src/simd_llvm.rs | 1 + 2 files changed, 77 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 516cfc90fe..b1c7354b2c 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -389,6 +389,32 @@ macro_rules! t_t_l { }; } +macro_rules! t_t_s { + (i32) => { + i32x4 + }; + (i16) => { + i16x8 + }; + (i8) => { + i8x16 + }; + + (u32) => { + u32x4 + }; + (u16) => { + u16x8 + }; + (u8) => { + u8x16 + }; + + (f32) => { + f32x4 + }; +} + macro_rules! impl_from { ($s: ident) => { #[unstable(feature = "stdarch_powerpc", issue = "111145")] @@ -2620,6 +2646,46 @@ mod sealed { impl_vec_trait! { [VectorUnpackl vec_unpackl]+ vec_vupklsb (vector_bool_char) -> vector_bool_short } impl_vec_trait! { [VectorUnpackl vec_unpackl] vec_vupklsh (vector_signed_short) -> vector_signed_int } impl_vec_trait! { [VectorUnpackl vec_unpackl]+ vec_vupklsh (vector_bool_short) -> vector_bool_int } + + macro_rules! impl_vec_shift { + ([$Trait:ident $m:ident] ($b:ident, $h:ident, $w:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_unsigned_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_unsigned_int) -> vector_signed_int } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSl { + type Result; + unsafe fn vec_sl(self, b: Other) -> Self::Result; + } + + macro_rules! impl_sl { + ($fun:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($fun))] + unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + let a = transmute(a); + let b = simd_rem( + transmute(b), + ::splat(mem::size_of::<$ty>() as $ty * $ty::BITS as $ty), + ); + + transmute(simd_shl(a, b)) + } + }; + } + + impl_sl! { vslb u8 } + impl_sl! { vslh u16 } + impl_sl! { vslw u32 } + + impl_vec_shift! { [VectorSl vec_sl] (vslb, vslh, vslw) } } /// Vector Merge Low @@ -2699,6 +2765,16 @@ where a.vec_unpackl() } +/// Vector Shift Left +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sl(a: T, b: U) -> >::Result +where + T: sealed::VectorSl, +{ + a.vec_sl(b) +} /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] diff --git a/crates/core_arch/src/simd_llvm.rs b/crates/core_arch/src/simd_llvm.rs index decdecaaf4..5b6cd0b93b 100644 --- a/crates/core_arch/src/simd_llvm.rs +++ b/crates/core_arch/src/simd_llvm.rs @@ -24,6 +24,7 @@ extern "platform-intrinsic" { pub fn simd_sub(x: T, y: T) -> T; pub fn simd_mul(x: T, y: T) -> T; pub fn simd_div(x: T, y: T) -> T; + pub fn simd_rem(x: T, y: T) -> T; pub fn simd_shl(x: T, y: T) -> T; pub fn simd_shr(x: T, y: T) -> T; pub fn simd_and(x: T, y: T) -> T; From f0244070d928c74a5de8fca5916e1f02d19b6a46 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 14 Jan 2024 15:42:13 +0000 Subject: [PATCH 10/22] Add vec_sld and vec_sldw --- crates/core_arch/src/powerpc/altivec.rs | 179 ++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index b1c7354b2c..d3579d3336 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -2686,6 +2686,142 @@ mod sealed { impl_sl! { vslw u32 } impl_vec_shift! { [VectorSl vec_sl] (vslb, vslh, vslw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSld { + unsafe fn vec_sld(self, b: Self) -> Self; + unsafe fn vec_sldw(self, b: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsldoi, UIMM4 = 1))] + unsafe fn vsldoi( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + static_assert_uimm_bits!(UIMM4, 4); + let d = UIMM4 as u8; + if cfg!(target_endian = "little") { + let perm = u8x16::new( + 16 - d, + 17 - d, + 18 - d, + 19 - d, + 20 - d, + 21 - d, + 22 - d, + 23 - d, + 24 - d, + 25 - d, + 26 - d, + 27 - d, + 28 - d, + 29 - d, + 30 - d, + 31 - d, + ); + + vec_perm(b, a, transmute(perm)) + } else { + let perm = u8x16::new( + d, + d + 1, + d + 2, + d + 3, + d + 4, + d + 5, + d + 6, + d + 7, + d + 8, + d + 9, + d + 10, + d + 11, + d + 12, + d + 13, + d + 14, + d + 15, + ); + vec_perm(a, b, transmute(perm)) + } + } + + // TODO: collapse the two once generic_const_exprs are usable. + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(xxsldwi, UIMM2 = 1))] + unsafe fn xxsldwi( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + static_assert_uimm_bits!(UIMM2, 2); + let d = (UIMM2 << 2) as u8; + if cfg!(target_endian = "little") { + let perm = u8x16::new( + 16 - d, + 17 - d, + 18 - d, + 19 - d, + 20 - d, + 21 - d, + 22 - d, + 23 - d, + 24 - d, + 25 - d, + 26 - d, + 27 - d, + 28 - d, + 29 - d, + 30 - d, + 31 - d, + ); + + vec_perm(b, a, transmute(perm)) + } else { + let perm = u8x16::new( + d, + d + 1, + d + 2, + d + 3, + d + 4, + d + 5, + d + 6, + d + 7, + d + 8, + d + 9, + d + 10, + d + 11, + d + 12, + d + 13, + d + 14, + d + 15, + ); + vec_perm(a, b, transmute(perm)) + } + } + + macro_rules! impl_vec_sld { + ($($ty:ident),+) => { $( + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSld for $ty { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sld(self, b: Self) -> Self { + transmute(vsldoi::(transmute(self), transmute(b))) + } + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sldw(self, b: Self) -> Self { + transmute(xxsldwi::(transmute(self), transmute(b))) + } + } + )+ }; + } + + impl_vec_sld! { vector_bool_char, vector_signed_char, vector_unsigned_char } + impl_vec_sld! { vector_bool_short, vector_signed_short, vector_unsigned_short } + impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int } + impl_vec_sld! { vector_float } } /// Vector Merge Low @@ -2775,6 +2911,49 @@ where { a.vec_sl(b) } + +/// Vector Shift Left Double +/// +/// ## Endian considerations +/// +/// This intrinsic is not endian-neutral, so uses of vec_sld in +/// big-endian code must be rewritten for little-endian targets. +/// +/// Historically, vec_sld could be used to shift by amounts not a multiple of the element size +/// for most types, in which case the purpose of the shift is difficult to determine and difficult +/// to automatically rewrite efficiently for little endian. +/// +/// So the concatenation of a and b is done in big-endian fashion (left to right), and the shift is +/// always to the left. This will generally produce surprising results for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sld(a: T, b: T) -> T +where + T: sealed::VectorSld, +{ + a.vec_sld::(b) +} + +/// Vector Shift Left Double by Words +/// +/// ## Endian considerations +/// +/// This intrinsic is not endian-neutral, so uses of vec_sldw in +/// big-endian code must be rewritten for little-endian targets. +/// +/// The concatenation of a and b is done in big-endian fashion (left to right), and the shift is +/// always to the left. This will generally produce surprising results for little- endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sldw(a: T, b: T) -> T +where + T: sealed::VectorSld, +{ + a.vec_sldw::(b) +} + /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] From 87b117874750078339a75c59c2a743d32ebe7312 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 14 Jan 2024 16:37:33 +0000 Subject: [PATCH 11/22] Add vec_sll --- crates/core_arch/src/powerpc/altivec.rs | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index d3579d3336..e4b8376810 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -335,6 +335,9 @@ extern "C" { #[link_name = "llvm.ppc.altivec.vlogefp"] fn vlogefp(a: vector_float) -> vector_float; + + #[link_name = "llvm.ppc.altivec.sll"] + fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; } macro_rules! s_t_l { @@ -2822,6 +2825,25 @@ mod sealed { impl_vec_sld! { vector_bool_short, vector_signed_short, vector_unsigned_short } impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int } impl_vec_sld! { vector_float } + + macro_rules! impl_vec_sll { + ([$Trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSll { + type Result; + unsafe fn vec_sll(self, b: Other) -> Self::Result; + } + + impl_vec_sll! { [VectorSll vec_sll] (vsl) } } /// Vector Merge Low @@ -2954,6 +2976,21 @@ where a.vec_sldw::(b) } +/// Vector Shift Left Long +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_sll in big-endian +/// code must be rewritten for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sll(a: T, b: U) -> >::Result +where + T: sealed::VectorSll, +{ + a.vec_sll(b) +} + /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] From 29f61e677ad55ef3ebf9ddb322c26824e6f8982d Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 14 Jan 2024 16:45:19 +0000 Subject: [PATCH 12/22] Add vec_slo --- crates/core_arch/src/powerpc/altivec.rs | 45 +++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index e4b8376810..ce31b7d3ec 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -338,6 +338,8 @@ extern "C" { #[link_name = "llvm.ppc.altivec.sll"] fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.slo"] + fn vslo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; } macro_rules! s_t_l { @@ -2844,6 +2846,33 @@ mod sealed { } impl_vec_sll! { [VectorSll vec_sll] (vsl) } + + macro_rules! impl_vec_slo { + ([$Trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_signed_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_signed_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_signed_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_signed_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_signed_char) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_float, vector_signed_char) -> vector_float } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_float, vector_unsigned_char) -> vector_float } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSlo { + type Result; + unsafe fn vec_slo(self, b: Other) -> Self::Result; + } + + impl_vec_slo! { [VectorSlo vec_slo] (vslo) } } /// Vector Merge Low @@ -2991,6 +3020,22 @@ where a.vec_sll(b) } +/// Vector Shift Left by Octets +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_slo in big-endian code must be rewritten +/// for little-endian targets. The shift count is in element 15 of b for big-endian, but in element +/// 0 of b for little-endian. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_slo(a: T, b: U) -> >::Result +where + T: sealed::VectorSlo, +{ + a.vec_slo(b) +} + /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] From 0c75799722ffa126d7ede8f6731dc029a9380a4f Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 09:53:37 +0000 Subject: [PATCH 13/22] Add vec_sr --- crates/core_arch/src/powerpc/altivec.rs | 47 ++++++++++++++++++------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index ce31b7d3ec..59bc59a980 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -2663,14 +2663,8 @@ mod sealed { }; } - #[unstable(feature = "stdarch_powerpc", issue = "111145")] - pub trait VectorSl { - type Result; - unsafe fn vec_sl(self, b: Other) -> Self::Result; - } - - macro_rules! impl_sl { - ($fun:ident $ty:ident) => { + macro_rules! impl_shift { + ($fun:ident $intr:ident $ty:ident) => { #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr($fun))] @@ -2681,17 +2675,35 @@ mod sealed { ::splat(mem::size_of::<$ty>() as $ty * $ty::BITS as $ty), ); - transmute(simd_shl(a, b)) + transmute($intr(a, b)) } }; } - impl_sl! { vslb u8 } - impl_sl! { vslh u16 } - impl_sl! { vslw u32 } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSl { + type Result; + unsafe fn vec_sl(self, b: Other) -> Self::Result; + } + + impl_shift! { vslb simd_shl u8 } + impl_shift! { vslh simd_shl u16 } + impl_shift! { vslw simd_shl u32 } impl_vec_shift! { [VectorSl vec_sl] (vslb, vslh, vslw) } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSr { + type Result; + unsafe fn vec_sr(self, b: Other) -> Self::Result; + } + + impl_shift! { vsrb simd_shr u8 } + impl_shift! { vsrh simd_shr u16 } + impl_shift! { vsrw simd_shr u32 } + + impl_vec_shift! { [VectorSr vec_sr] (vsrb, vsrh, vsrw) } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorSld { unsafe fn vec_sld(self, b: Self) -> Self; @@ -2963,6 +2975,17 @@ where a.vec_sl(b) } +/// Vector Shift Right +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sr(a: T, b: U) -> >::Result +where + T: sealed::VectorSr, +{ + a.vec_sr(b) +} + /// Vector Shift Left Double /// /// ## Endian considerations From 32cb2d0482d28dec9a0d753c3a4bf082d0aa73ea Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 10:17:36 +0000 Subject: [PATCH 14/22] Add vec_sra --- crates/core_arch/src/powerpc/altivec.rs | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 59bc59a980..d36d0fe412 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -340,6 +340,13 @@ extern "C" { fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.slo"] fn vslo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.srab"] + fn vsrab(a: vector_signed_char, b: vector_unsigned_char) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.srah"] + fn vsrah(a: vector_signed_short, b: vector_unsigned_short) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.sraw"] + fn vsraw(a: vector_signed_int, b: vector_unsigned_int) -> vector_signed_int; } macro_rules! s_t_l { @@ -2704,6 +2711,14 @@ mod sealed { impl_vec_shift! { [VectorSr vec_sr] (vsrb, vsrh, vsrw) } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSra { + type Result; + unsafe fn vec_sra(self, b: Other) -> Self::Result; + } + + impl_vec_shift! { [VectorSra vec_sra] (vsrab, vsrah, vsraw) } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorSld { unsafe fn vec_sld(self, b: Self) -> Self; @@ -2986,6 +3001,17 @@ where a.vec_sr(b) } +/// Vector Shift Right Algebraic +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sra(a: T, b: U) -> >::Result +where + T: sealed::VectorSra, +{ + a.vec_sra(b) +} + /// Vector Shift Left Double /// /// ## Endian considerations From d51408673aff4c91103b2b92178ccf1895f176c1 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 10:40:06 +0000 Subject: [PATCH 15/22] Add vec_srl --- crates/core_arch/src/powerpc/altivec.rs | 30 +++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index d36d0fe412..044feeb69e 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -347,6 +347,9 @@ extern "C" { fn vsrah(a: vector_signed_short, b: vector_unsigned_short) -> vector_signed_short; #[link_name = "llvm.ppc.altivec.sraw"] fn vsraw(a: vector_signed_int, b: vector_unsigned_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.srl"] + fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; } macro_rules! s_t_l { @@ -2855,7 +2858,7 @@ mod sealed { impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int } impl_vec_sld! { vector_float } - macro_rules! impl_vec_sll { + macro_rules! impl_vec_shift_long { ([$Trait:ident $m:ident] ($f:ident)) => { impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } @@ -2872,7 +2875,15 @@ mod sealed { unsafe fn vec_sll(self, b: Other) -> Self::Result; } - impl_vec_sll! { [VectorSll vec_sll] (vsl) } + impl_vec_shift_long! { [VectorSll vec_sll] (vsl) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSrl { + type Result; + unsafe fn vec_srl(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSrl vec_srl] (vsr) } macro_rules! impl_vec_slo { ([$Trait:ident $m:ident] ($f:ident)) => { @@ -3069,6 +3080,21 @@ where a.vec_sll(b) } +/// Vector Shift Right Long +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_srl in big-endian +/// code must be rewritten for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_srl(a: T, b: U) -> >::Result +where + T: sealed::VectorSrl, +{ + a.vec_srl(b) +} + /// Vector Shift Left by Octets /// /// ## Endian considerations From e2959afe4c4a0d7ffda6aa6b09d0eb67c0a51bbf Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 10:46:03 +0000 Subject: [PATCH 16/22] Add vec_sro --- crates/core_arch/src/powerpc/altivec.rs | 30 +++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 044feeb69e..8ea2e014b4 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -350,6 +350,8 @@ extern "C" { #[link_name = "llvm.ppc.altivec.srl"] fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.sro"] + fn vsro(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; } macro_rules! s_t_l { @@ -2885,7 +2887,7 @@ mod sealed { impl_vec_shift_long! { [VectorSrl vec_srl] (vsr) } - macro_rules! impl_vec_slo { + macro_rules! impl_vec_shift_octect { ([$Trait:ident $m:ident] ($f:ident)) => { impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_signed_char) -> vector_unsigned_char } impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_signed_char) -> vector_signed_char } @@ -2910,7 +2912,15 @@ mod sealed { unsafe fn vec_slo(self, b: Other) -> Self::Result; } - impl_vec_slo! { [VectorSlo vec_slo] (vslo) } + impl_vec_shift_octect! { [VectorSlo vec_slo] (vslo) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSro { + type Result; + unsafe fn vec_sro(self, b: Other) -> Self::Result; + } + + impl_vec_shift_octect! { [VectorSro vec_sro] (vsro) } } /// Vector Merge Low @@ -3111,6 +3121,22 @@ where a.vec_slo(b) } +/// Vector Shift Right by Octets +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_sro in big-endian code must be rewritten +/// for little-endian targets. The shift count is in element 15 of b for big-endian, but in element +/// 0 of b for little-endian. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sro(a: T, b: U) -> >::Result +where + T: sealed::VectorSro, +{ + a.vec_sro(b) +} + /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] From 9c794a5e295cd71ab0e22c55cd8e991d071ba715 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 13:02:27 +0000 Subject: [PATCH 17/22] Add vec_slv and vec_srv --- crates/core_arch/src/powerpc/altivec.rs | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 8ea2e014b4..bfda40c38f 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -352,6 +352,11 @@ extern "C" { fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.sro"] fn vsro(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.slv"] + fn vslv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.srv"] + fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; } macro_rules! s_t_l { @@ -3137,6 +3142,42 @@ where a.vec_sro(b) } +/// Vector Shift Left Variable +/// +/// ## Result value +/// Let v be a 17-byte vector formed from a in bytes `[0:15]` and a zero byte in element 16. +/// Then each byte element i of r is determined as follows. The start bit sb is +/// obtained from bits 5:7 of byte element i of b. Then the contents of bits sb:sb+7 of the +/// halfword in byte elements i:i+1 of v are placed into byte element i of r. +/// +/// ## Endian considerations +/// All bit and byte element numbers are specified in big-endian order. This intrinsic is not +/// endian-neutral. +#[inline] +#[target_feature(enable = "power9-altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_slv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char { + vslv(a, b) +} + +/// Vector Shift Right Variable +/// +/// ## Result value +/// Let v be a 17-byte vector formed from a zero byte in element 0 and the elements of +/// a in bytes `[1:16]`. Then each byte element i of r is determined as follows. The start bit sb is +/// obtained from bits 5:7 of byte element i of b. Then the contents of bits (8 – sb):(15 – sb) of +/// the halfword in byte elements i:i+1 of v are placed into byte element i of r. +/// +/// ## Endian considerations +/// All bit and byte element numbers are specified in big-endian order. This intrinsic is not +/// endian-neutral. +#[inline] +#[target_feature(enable = "power9-altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_srv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char { + vsrv(a, b) +} + /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] From 39b6df946eb1b4cc8682fb1f6f13f9397c49d047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Wed, 24 Jan 2024 12:54:50 +0100 Subject: [PATCH 18/22] CI: add a success conclusion job --- .github/workflows/main.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 56d11ae211..e1c765e995 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -225,3 +225,20 @@ jobs: - name: Install Rust run: rustup update nightly && rustup default nightly - run: ./ci/build-std-detect.sh + + success: + needs: + - docs + - verify + - env_override + - test + - build-std-detect + runs-on: ubuntu-latest + # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency + # failed" as success. So we have to do some contortions to ensure the job fails if any of its + # dependencies fails. + if: always() # make sure this is never "skipped" + steps: + # Manually check the status of all dependencies. `if: failure()` does not work. + - name: check if any dependency failed + run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' From 77285f9bc6a53447cb0d93d1f702554c460370fb Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 15:37:12 +0000 Subject: [PATCH 19/22] Add vec_adde --- crates/core_arch/src/powerpc/altivec.rs | 43 +++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index bfda40c38f..28ebef5aa1 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -2244,6 +2244,33 @@ mod sealed { } } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAdde { + unsafe fn vec_adde(self, b: Self, c: Self) -> Self; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdde for vector_unsigned_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_adde(self, b: Self, c: Self) -> Self { + let mask: vector_unsigned_int = transmute(u32x4::new(1, 1, 1, 1)); + let carry = vec_and(c, mask); + vec_add(vec_add(self, b), carry) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdde for vector_signed_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_adde(self, b: Self, c: Self) -> Self { + let mask: vector_signed_int = transmute(i32x4::new(1, 1, 1, 1)); + let carry = vec_and(c, mask); + vec_add(vec_add(self, b), carry) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorMladd { type Result; @@ -3523,6 +3550,22 @@ where a.vec_add(b) } +/// Vector Add Extended +/// +/// ## Result value +/// The value of each element of r is produced by adding the corresponding elements of +/// a and b with a carry specified in the corresponding element of c (1 if there is a carry, 0 +/// otherwise). +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_adde(a: T, b: T, c: T) -> T +where + T: sealed::VectorAdde, +{ + a.vec_adde(b, c) +} + /// Vector Convert to Floating-Point #[inline] #[target_feature(enable = "altivec")] From fc34d9a616f85dce6a7e13b501f9bf5ad69c8831 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 17:30:05 +0000 Subject: [PATCH 20/22] Add the boolean types for vec_nor --- crates/core_arch/src/powerpc/altivec.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 28ebef5aa1..b28ef6e561 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -2346,9 +2346,6 @@ mod sealed { vector_vnor! { vec_vnorsb i8 } vector_vnor! { vec_vnorsh i16 } vector_vnor! { vec_vnorsw i32 } - vector_vnor! { vec_vnorub u8 } - vector_vnor! { vec_vnoruh u16 } - vector_vnor! { vec_vnoruw u32 } #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorNor { @@ -2356,7 +2353,7 @@ mod sealed { unsafe fn vec_nor(self, b: Other) -> Self::Result; } - impl_vec_trait! { [VectorNor vec_nor] 2 (vec_vnorub, vec_vnorsb, vec_vnoruh, vec_vnorsh, vec_vnoruw, vec_vnorsw) } + impl_vec_trait! { [VectorNor vec_nor]+ 2b (vec_vnorsb, vec_vnorsh, vec_vnorsw) } #[inline] #[target_feature(enable = "altivec")] From 894fe28968243a6873c516a98278691174bb442c Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 21 Jan 2024 18:51:24 +0000 Subject: [PATCH 21/22] Add vec_cmpne --- crates/core_arch/src/powerpc/altivec.rs | 86 +++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index b28ef6e561..4fc166da7a 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -244,6 +244,13 @@ extern "C" { #[link_name = "llvm.ppc.altivec.vcmpequw"] fn vcmpequw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_bool_int; + #[link_name = "llvm.ppc.altivec.vcmpneb"] + fn vcmpneb(a: vector_signed_char, b: vector_signed_char) -> vector_bool_char; + #[link_name = "llvm.ppc.altivec.vcmpneh"] + fn vcmpneh(a: vector_signed_short, b: vector_signed_short) -> vector_bool_short; + #[link_name = "llvm.ppc.altivec.vcmpnew"] + fn vcmpnew(a: vector_signed_int, b: vector_signed_int) -> vector_bool_int; + #[link_name = "llvm.ppc.altivec.vcmpgefp"] fn vcmpgefp(a: vector_float, b: vector_float) -> vector_bool_int; @@ -739,6 +746,34 @@ mod sealed { impl_vec_cmp! { [VectorCmpEq vec_cmpeq] (vec_vcmpequb, vec_vcmpequh, vec_vcmpequw) } + macro_rules! impl_cmpne { + ($fun:ident ($ty:ident) -> $r:ident $([ $pwr9:ident ])? ) => { + #[inline] + #[target_feature(enable = "altivec")] + $( #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] )? + unsafe fn $fun(a: $ty, b: $ty) -> $r { + $( if cfg!(target_feature = "power9-altivec") { + transmute($pwr9(transmute(a), transmute(b))) + } else )? { + let zero = transmute(i32x4::new(0, 0, 0, 0)); + vec_nor(vec_cmpeq(a, b), zero) + } + } + }; + } + + impl_cmpne! { vec_vcmpneb(vector_signed_char) -> vector_bool_char [ vcmpneb ] } + impl_cmpne! { vec_vcmpneh(vector_signed_short) -> vector_bool_short [ vcmpneh ] } + impl_cmpne! { vec_vcmpnew(vector_signed_int) -> vector_bool_int [ vcmpnew ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCmpNe { + type Result; + unsafe fn vec_cmpne(self, b: Other) -> Self::Result; + } + + impl_vec_cmp! { [VectorCmpNe vec_cmpne] (vec_vcmpneb, vec_vcmpneh, vec_vcmpnew) } + test_impl! { vec_vcmpbfp(a: vector_float, b: vector_float) -> vector_signed_int [vcmpbfp, vcmpbfp] } #[inline] @@ -3331,6 +3366,21 @@ where a.vec_cmpeq(b) } +/// Vector Compare Not Equal +/// +/// ## Result value +/// For each element of r, the value of each bit is 1 if the corresponding elements +/// of a and b are not equal. Otherwise, the value of each bit is 0. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpne(a: T, b: U) -> >::Result +where + T: sealed::VectorCmpNe, +{ + a.vec_cmpne(b) +} + /// Vector cmpb. #[inline] #[target_feature(enable = "altivec")] @@ -4317,6 +4367,42 @@ mod tests { [false, true, true, false] } + test_vec_2! { test_vec_cmpne_i8, vec_cmpne, i8x16 -> m8x16, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_u8, vec_cmpne, u8x16 -> m8x16, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_i16, vec_cmpne, i16x8 -> m16x8, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_u16, vec_cmpne, u16x8 -> m16x8, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_i32, vec_cmpne, i32x4 -> m32x4, + [1, -1, 0, 0], + [0, -1, 0, 1], + [true, false, false, true] + } + + test_vec_2! { test_vec_cmpne_u32, vec_cmpne, u32x4 -> m32x4, + [1, 255, 0, 0], + [0, 255, 0, 1], + [true, false, false, true] + } + test_vec_2! { test_vec_all_eq_i8_false, vec_all_eq, i8x16 -> bool, [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], From 387dddc1d07204a19dcf780ff5e2f68d601b9600 Mon Sep 17 00:00:00 2001 From: usamoi Date: Tue, 12 Dec 2023 17:04:15 +0800 Subject: [PATCH 22/22] feat: std_detect avx512fp16 Signed-off-by: usamoi --- crates/std_detect/src/detect/arch/x86.rs | 3 +++ crates/std_detect/src/detect/os/x86.rs | 14 ++++++++------ crates/std_detect/tests/cpu-detection.rs | 1 + crates/std_detect/tests/x86-specific.rs | 1 + 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/crates/std_detect/src/detect/arch/x86.rs b/crates/std_detect/src/detect/arch/x86.rs index 828ac5c38a..f4f45750ed 100644 --- a/crates/std_detect/src/detect/arch/x86.rs +++ b/crates/std_detect/src/detect/arch/x86.rs @@ -75,6 +75,7 @@ features! { /// * `"avx512bitalg"` /// * `"avx512bf16"` /// * `"avx512vp2intersect"` + /// * `"avx512fp16"` /// * `"f16c"` /// * `"fma"` /// * `"bmi1"` @@ -169,6 +170,8 @@ features! { /// AVX-512 BF16 (BFLOAT16 instructions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect"; /// AVX-512 P2INTERSECT + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16"; + /// AVX-512 FP16 (FLOAT16 instructions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; diff --git a/crates/std_detect/src/detect/os/x86.rs b/crates/std_detect/src/detect/os/x86.rs index d8dd84db49..4ff9ac5f13 100644 --- a/crates/std_detect/src/detect/os/x86.rs +++ b/crates/std_detect/src/detect/os/x86.rs @@ -69,12 +69,13 @@ pub(crate) fn detect_features() -> cache::Initializer { // EAX = 7, ECX = 0: Queries "Extended Features"; // Contains information about bmi,bmi2, and avx2 support. - let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 { - let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; - (ebx, ecx) - } else { - (0, 0) // CPUID does not support "Extended Features" - }; + let (extended_features_ebx, extended_features_ecx, extended_features_edx) = + if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + (ebx, ecx, edx) + } else { + (0, 0, 0) // CPUID does not support "Extended Features" + }; // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported // - EAX returns the max leaf value for extended information, that is, @@ -217,6 +218,7 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_ecx, 11, Feature::avx512vnni); enable(extended_features_ecx, 12, Feature::avx512bitalg); enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + enable(extended_features_edx, 23, Feature::avx512fp16); } } } diff --git a/crates/std_detect/tests/cpu-detection.rs b/crates/std_detect/tests/cpu-detection.rs index 1ad897a2e2..1053de3a82 100644 --- a/crates/std_detect/tests/cpu-detection.rs +++ b/crates/std_detect/tests/cpu-detection.rs @@ -233,6 +233,7 @@ fn x86_all() { "avx512vp2intersect {:?}", is_x86_feature_detected!("avx512vp2intersect") ); + println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16")); println!("f16c: {:?}", is_x86_feature_detected!("f16c")); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("bmi1: {:?}", is_x86_feature_detected!("bmi1")); diff --git a/crates/std_detect/tests/x86-specific.rs b/crates/std_detect/tests/x86-specific.rs index 1d8d8ba2ec..ae7f677ed4 100644 --- a/crates/std_detect/tests/x86-specific.rs +++ b/crates/std_detect/tests/x86-specific.rs @@ -51,6 +51,7 @@ fn dump() { "avx512vp2intersect {:?}", is_x86_feature_detected!("avx512vp2intersect") ); + println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16")); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("abm: {:?}", is_x86_feature_detected!("abm")); println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));