diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 014a9aca0f..e1c765e995 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: name: Check Style runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: ci/style.sh @@ -23,7 +23,7 @@ jobs: needs: [style] runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: ci/dox.sh @@ -43,7 +43,7 @@ jobs: needs: [style] runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml @@ -53,7 +53,7 @@ jobs: needs: [style] runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: RUST_STD_DETECT_UNSTABLE=avx cargo test --features=std_detect_env_override --manifest-path crates/std_detect/Cargo.toml env_override_no_avx @@ -164,7 +164,7 @@ jobs: os: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 with: submodules: recursive - name: Install Rust (rustup) @@ -221,7 +221,24 @@ jobs: name: Build std_detect runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: ./ci/build-std-detect.sh + + success: + needs: + - docs + - verify + - env_override + - test + - build-std-detect + runs-on: ubuntu-latest + # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency + # failed" as success. So we have to do some contortions to ensure the job fails if any of its + # dependencies fails. + if: always() # make sure this is never "skipped" + steps: + # Manually check the status of all dependencies. `if: failure()` does not work. + - name: check if any dependency failed + run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' diff --git a/crates/core_arch/src/aarch64/crc.rs b/crates/core_arch/src/aarch64/crc.rs deleted file mode 100644 index 35940e0db8..0000000000 --- a/crates/core_arch/src/aarch64/crc.rs +++ /dev/null @@ -1,51 +0,0 @@ -extern "unadjusted" { - #[link_name = "llvm.aarch64.crc32x"] - fn crc32x_(crc: u32, data: u64) -> u32; - - #[link_name = "llvm.aarch64.crc32cx"] - fn crc32cx_(crc: u32, data: u64) -> u32; -} - -#[cfg(test)] -use stdarch_test::assert_instr; - -/// CRC32 single round checksum for quad words (64 bits). -/// -/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d) -#[inline] -#[target_feature(enable = "crc")] -#[cfg_attr(test, assert_instr(crc32x))] -#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] -pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { - crc32x_(crc, data) -} - -/// CRC32-C single round checksum for quad words (64 bits). -/// -/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd) -#[inline] -#[target_feature(enable = "crc")] -#[cfg_attr(test, assert_instr(crc32cx))] -#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] -pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { - crc32cx_(crc, data) -} - -#[cfg(test)] -mod tests { - use crate::core_arch::{aarch64::*, simd::*}; - use std::mem; - use stdarch_test::simd_test; - - #[simd_test(enable = "crc")] - unsafe fn test_crc32d() { - assert_eq!(__crc32d(0, 0), 0); - assert_eq!(__crc32d(0, 18446744073709551615), 1147535477); - } - - #[simd_test(enable = "crc")] - unsafe fn test_crc32cd() { - assert_eq!(__crc32cd(0, 0), 0); - assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501); - } -} diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index fefd2f4780..ebd7a31781 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -17,10 +17,6 @@ mod tme; #[unstable(feature = "stdarch_aarch64_tme", issue = "117216")] pub use self::tme::*; -mod crc; -#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] -pub use self::crc::*; - mod prefetch; #[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")] pub use self::prefetch::*; diff --git a/crates/core_arch/src/arm_shared/crc.rs b/crates/core_arch/src/arm_shared/crc.rs index b1f716e1aa..8eedd21696 100644 --- a/crates/core_arch/src/arm_shared/crc.rs +++ b/crates/core_arch/src/arm_shared/crc.rs @@ -18,6 +18,10 @@ extern "unadjusted" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cw")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn crc32cw_(crc: u32, data: u32) -> u32; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32x")] + fn crc32x_(crc: u32, data: u64) -> u32; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cx")] + fn crc32cx_(crc: u32, data: u64) -> u32; } #[cfg(test)] @@ -95,12 +99,82 @@ pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 { crc32cw_(crc, data) } +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "aarch64")] +#[cfg_attr(test, assert_instr(crc32x))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { + crc32x_(crc, data) +} + +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(crc32w))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { + // On 32-bit ARM this intrinsic emits a chain of two `crc32_w` instructions + // and truncates the data to 32 bits in both clang and gcc + crc32w_( + crc32w_(crc, (data & 0xffffffff) as u32), + (data >> 32) as u32, + ) +} + +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "aarch64")] +#[cfg_attr(test, assert_instr(crc32cx))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { + crc32cx_(crc, data) +} + +/// CRC32 single round checksum for quad words (64 bits). +/// +/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd) +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(crc32cw))] +#[unstable(feature = "stdarch_arm_crc32", issue = "117215")] +pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { + // On 32-bit ARM this intrinsic emits a chain of two `crc32_cw` instructions + // and truncates the data to 32 bits in both clang and gcc + crc32cw_( + crc32cw_(crc, (data & 0xffffffff) as u32), + (data >> 32) as u32, + ) +} + #[cfg(test)] mod tests { use crate::core_arch::{arm_shared::*, simd::*}; use std::mem; use stdarch_test::simd_test; + #[simd_test(enable = "crc")] + unsafe fn test_crc32d() { + assert_eq!(__crc32d(0, 0), 0); + assert_eq!(__crc32d(0, 18446744073709551615), 1147535477); + } + + #[simd_test(enable = "crc")] + unsafe fn test_crc32cd() { + assert_eq!(__crc32cd(0, 0), 0); + assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501); + } + #[simd_test(enable = "crc")] unsafe fn test_crc32b() { assert_eq!(__crc32b(0, 0), 0); diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 5b43549d34..2d12f5e99b 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -8707,8 +8707,7 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { /// 8-bit integer matrix multiply-accumulate #[inline] -#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smmla))] @@ -8735,8 +8734,7 @@ pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t /// 8-bit integer matrix multiply-accumulate #[inline] -#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ummla))] @@ -8763,8 +8761,7 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x /// Unsigned and signed 8-bit integer matrix multiply-accumulate #[inline] -#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usmmla))] diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index bd4de67445..5dcd11fb68 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -35,7 +35,7 @@ inline_const, generic_arg_infer )] -#![cfg_attr(test, feature(test, abi_vectorcall))] +#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))] #![deny(clippy::missing_inline_in_public_items)] #![allow( clippy::identity_op, diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 544fce89ab..4fc166da7a 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -244,6 +244,13 @@ extern "C" { #[link_name = "llvm.ppc.altivec.vcmpequw"] fn vcmpequw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_bool_int; + #[link_name = "llvm.ppc.altivec.vcmpneb"] + fn vcmpneb(a: vector_signed_char, b: vector_signed_char) -> vector_bool_char; + #[link_name = "llvm.ppc.altivec.vcmpneh"] + fn vcmpneh(a: vector_signed_short, b: vector_signed_short) -> vector_bool_short; + #[link_name = "llvm.ppc.altivec.vcmpnew"] + fn vcmpnew(a: vector_signed_int, b: vector_signed_int) -> vector_bool_int; + #[link_name = "llvm.ppc.altivec.vcmpgefp"] fn vcmpgefp(a: vector_float, b: vector_float) -> vector_bool_int; @@ -335,6 +342,28 @@ extern "C" { #[link_name = "llvm.ppc.altivec.vlogefp"] fn vlogefp(a: vector_float) -> vector_float; + + #[link_name = "llvm.ppc.altivec.sll"] + fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.slo"] + fn vslo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.srab"] + fn vsrab(a: vector_signed_char, b: vector_unsigned_char) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.srah"] + fn vsrah(a: vector_signed_short, b: vector_unsigned_short) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.sraw"] + fn vsraw(a: vector_signed_int, b: vector_unsigned_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.srl"] + fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.sro"] + fn vsro(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.slv"] + fn vslv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.srv"] + fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; } macro_rules! s_t_l { @@ -389,6 +418,32 @@ macro_rules! t_t_l { }; } +macro_rules! t_t_s { + (i32) => { + i32x4 + }; + (i16) => { + i16x8 + }; + (i8) => { + i8x16 + }; + + (u32) => { + u32x4 + }; + (u16) => { + u16x8 + }; + (u8) => { + u8x16 + }; + + (f32) => { + f32x4 + }; +} + macro_rules! impl_from { ($s: ident) => { #[unstable(feature = "stdarch_powerpc", issue = "111145")] @@ -606,6 +661,57 @@ mod sealed { impl_vec_xl! { vec_xl_u32 lxvd2x / lxv u32 } impl_vec_xl! { vec_xl_f32 lxvd2x / lxv f32 } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXst { + type Out; + unsafe fn vec_xst(self, a: isize, p: Self::Out); + } + + macro_rules! impl_vec_xst { + ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, not(target_feature = "power9-altivec")), + assert_instr($notpwr9) + )] + #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] + pub unsafe fn $fun(s: t_t_l!($ty), a: isize, b: *mut $ty) { + let addr = (b as *mut u8).offset(a); + + // Workaround ptr::copy_nonoverlapping not being inlined + extern "rust-intrinsic" { + #[rustc_nounwind] + pub fn copy_nonoverlapping(src: *const T, dst: *mut T, count: usize); + } + + copy_nonoverlapping( + &s as *const _ as *const u8, + addr, + mem::size_of::(), + ); + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXst for t_t_l!($ty) { + type Out = *mut $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_xst(self, a: isize, b: Self::Out) { + $fun(self, a, b) + } + } + }; + } + + impl_vec_xst! { vec_xst_i8 stxvd2x / stxv i8 } + impl_vec_xst! { vec_xst_u8 stxvd2x / stxv u8 } + impl_vec_xst! { vec_xst_i16 stxvd2x / stxv i16 } + impl_vec_xst! { vec_xst_u16 stxvd2x / stxv u16 } + impl_vec_xst! { vec_xst_i32 stxvd2x / stxv i32 } + impl_vec_xst! { vec_xst_u32 stxvd2x / stxv u32 } + impl_vec_xst! { vec_xst_f32 stxvd2x / stxv f32 } + test_impl! { vec_floor(a: vector_float) -> vector_float [ vfloor, vrfim / xvrspim ] } test_impl! { vec_vexptefp(a: vector_float) -> vector_float [ vexptefp, vexptefp ] } @@ -640,6 +746,34 @@ mod sealed { impl_vec_cmp! { [VectorCmpEq vec_cmpeq] (vec_vcmpequb, vec_vcmpequh, vec_vcmpequw) } + macro_rules! impl_cmpne { + ($fun:ident ($ty:ident) -> $r:ident $([ $pwr9:ident ])? ) => { + #[inline] + #[target_feature(enable = "altivec")] + $( #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] )? + unsafe fn $fun(a: $ty, b: $ty) -> $r { + $( if cfg!(target_feature = "power9-altivec") { + transmute($pwr9(transmute(a), transmute(b))) + } else )? { + let zero = transmute(i32x4::new(0, 0, 0, 0)); + vec_nor(vec_cmpeq(a, b), zero) + } + } + }; + } + + impl_cmpne! { vec_vcmpneb(vector_signed_char) -> vector_bool_char [ vcmpneb ] } + impl_cmpne! { vec_vcmpneh(vector_signed_short) -> vector_bool_short [ vcmpneh ] } + impl_cmpne! { vec_vcmpnew(vector_signed_int) -> vector_bool_int [ vcmpnew ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCmpNe { + type Result; + unsafe fn vec_cmpne(self, b: Other) -> Self::Result; + } + + impl_vec_cmp! { [VectorCmpNe vec_cmpne] (vec_vcmpneb, vec_vcmpneh, vec_vcmpnew) } + test_impl! { vec_vcmpbfp(a: vector_float, b: vector_float) -> vector_signed_int [vcmpbfp, vcmpbfp] } #[inline] @@ -2145,6 +2279,33 @@ mod sealed { } } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAdde { + unsafe fn vec_adde(self, b: Self, c: Self) -> Self; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdde for vector_unsigned_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_adde(self, b: Self, c: Self) -> Self { + let mask: vector_unsigned_int = transmute(u32x4::new(1, 1, 1, 1)); + let carry = vec_and(c, mask); + vec_add(vec_add(self, b), carry) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdde for vector_signed_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_adde(self, b: Self, c: Self) -> Self { + let mask: vector_signed_int = transmute(i32x4::new(1, 1, 1, 1)); + let carry = vec_and(c, mask); + vec_add(vec_add(self, b), carry) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorMladd { type Result; @@ -2220,9 +2381,6 @@ mod sealed { vector_vnor! { vec_vnorsb i8 } vector_vnor! { vec_vnorsh i16 } vector_vnor! { vec_vnorsw i32 } - vector_vnor! { vec_vnorub u8 } - vector_vnor! { vec_vnoruh u16 } - vector_vnor! { vec_vnoruw u32 } #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorNor { @@ -2230,7 +2388,7 @@ mod sealed { unsafe fn vec_nor(self, b: Other) -> Self::Result; } - impl_vec_trait! { [VectorNor vec_nor] 2 (vec_vnorub, vec_vnorsb, vec_vnoruh, vec_vnorsh, vec_vnoruw, vec_vnorsw) } + impl_vec_trait! { [VectorNor vec_nor]+ 2b (vec_vnorsb, vec_vnorsh, vec_vnorsw) } #[inline] #[target_feature(enable = "altivec")] @@ -2569,6 +2727,264 @@ mod sealed { impl_vec_trait! { [VectorUnpackl vec_unpackl]+ vec_vupklsb (vector_bool_char) -> vector_bool_short } impl_vec_trait! { [VectorUnpackl vec_unpackl] vec_vupklsh (vector_signed_short) -> vector_signed_int } impl_vec_trait! { [VectorUnpackl vec_unpackl]+ vec_vupklsh (vector_bool_short) -> vector_bool_int } + + macro_rules! impl_vec_shift { + ([$Trait:ident $m:ident] ($b:ident, $h:ident, $w:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_unsigned_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_unsigned_int) -> vector_signed_int } + }; + } + + macro_rules! impl_shift { + ($fun:ident $intr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($fun))] + unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + let a = transmute(a); + let b = simd_rem( + transmute(b), + ::splat(mem::size_of::<$ty>() as $ty * $ty::BITS as $ty), + ); + + transmute($intr(a, b)) + } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSl { + type Result; + unsafe fn vec_sl(self, b: Other) -> Self::Result; + } + + impl_shift! { vslb simd_shl u8 } + impl_shift! { vslh simd_shl u16 } + impl_shift! { vslw simd_shl u32 } + + impl_vec_shift! { [VectorSl vec_sl] (vslb, vslh, vslw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSr { + type Result; + unsafe fn vec_sr(self, b: Other) -> Self::Result; + } + + impl_shift! { vsrb simd_shr u8 } + impl_shift! { vsrh simd_shr u16 } + impl_shift! { vsrw simd_shr u32 } + + impl_vec_shift! { [VectorSr vec_sr] (vsrb, vsrh, vsrw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSra { + type Result; + unsafe fn vec_sra(self, b: Other) -> Self::Result; + } + + impl_vec_shift! { [VectorSra vec_sra] (vsrab, vsrah, vsraw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSld { + unsafe fn vec_sld(self, b: Self) -> Self; + unsafe fn vec_sldw(self, b: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsldoi, UIMM4 = 1))] + unsafe fn vsldoi( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + static_assert_uimm_bits!(UIMM4, 4); + let d = UIMM4 as u8; + if cfg!(target_endian = "little") { + let perm = u8x16::new( + 16 - d, + 17 - d, + 18 - d, + 19 - d, + 20 - d, + 21 - d, + 22 - d, + 23 - d, + 24 - d, + 25 - d, + 26 - d, + 27 - d, + 28 - d, + 29 - d, + 30 - d, + 31 - d, + ); + + vec_perm(b, a, transmute(perm)) + } else { + let perm = u8x16::new( + d, + d + 1, + d + 2, + d + 3, + d + 4, + d + 5, + d + 6, + d + 7, + d + 8, + d + 9, + d + 10, + d + 11, + d + 12, + d + 13, + d + 14, + d + 15, + ); + vec_perm(a, b, transmute(perm)) + } + } + + // TODO: collapse the two once generic_const_exprs are usable. + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(xxsldwi, UIMM2 = 1))] + unsafe fn xxsldwi( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + static_assert_uimm_bits!(UIMM2, 2); + let d = (UIMM2 << 2) as u8; + if cfg!(target_endian = "little") { + let perm = u8x16::new( + 16 - d, + 17 - d, + 18 - d, + 19 - d, + 20 - d, + 21 - d, + 22 - d, + 23 - d, + 24 - d, + 25 - d, + 26 - d, + 27 - d, + 28 - d, + 29 - d, + 30 - d, + 31 - d, + ); + + vec_perm(b, a, transmute(perm)) + } else { + let perm = u8x16::new( + d, + d + 1, + d + 2, + d + 3, + d + 4, + d + 5, + d + 6, + d + 7, + d + 8, + d + 9, + d + 10, + d + 11, + d + 12, + d + 13, + d + 14, + d + 15, + ); + vec_perm(a, b, transmute(perm)) + } + } + + macro_rules! impl_vec_sld { + ($($ty:ident),+) => { $( + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSld for $ty { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sld(self, b: Self) -> Self { + transmute(vsldoi::(transmute(self), transmute(b))) + } + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sldw(self, b: Self) -> Self { + transmute(xxsldwi::(transmute(self), transmute(b))) + } + } + )+ }; + } + + impl_vec_sld! { vector_bool_char, vector_signed_char, vector_unsigned_char } + impl_vec_sld! { vector_bool_short, vector_signed_short, vector_unsigned_short } + impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int } + impl_vec_sld! { vector_float } + + macro_rules! impl_vec_shift_long { + ([$Trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSll { + type Result; + unsafe fn vec_sll(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSll vec_sll] (vsl) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSrl { + type Result; + unsafe fn vec_srl(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSrl vec_srl] (vsr) } + + macro_rules! impl_vec_shift_octect { + ([$Trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_signed_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_signed_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_signed_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_signed_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_signed_char) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_float, vector_signed_char) -> vector_float } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_float, vector_unsigned_char) -> vector_float } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSlo { + type Result; + unsafe fn vec_slo(self, b: Other) -> Self::Result; + } + + impl_vec_shift_octect! { [VectorSlo vec_slo] (vslo) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSro { + type Result; + unsafe fn vec_sro(self, b: Other) -> Self::Result; + } + + impl_vec_shift_octect! { [VectorSro vec_sro] (vsro) } } /// Vector Merge Low @@ -2648,6 +3064,179 @@ where a.vec_unpackl() } +/// Vector Shift Left +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sl(a: T, b: U) -> >::Result +where + T: sealed::VectorSl, +{ + a.vec_sl(b) +} + +/// Vector Shift Right +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sr(a: T, b: U) -> >::Result +where + T: sealed::VectorSr, +{ + a.vec_sr(b) +} + +/// Vector Shift Right Algebraic +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sra(a: T, b: U) -> >::Result +where + T: sealed::VectorSra, +{ + a.vec_sra(b) +} + +/// Vector Shift Left Double +/// +/// ## Endian considerations +/// +/// This intrinsic is not endian-neutral, so uses of vec_sld in +/// big-endian code must be rewritten for little-endian targets. +/// +/// Historically, vec_sld could be used to shift by amounts not a multiple of the element size +/// for most types, in which case the purpose of the shift is difficult to determine and difficult +/// to automatically rewrite efficiently for little endian. +/// +/// So the concatenation of a and b is done in big-endian fashion (left to right), and the shift is +/// always to the left. This will generally produce surprising results for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sld(a: T, b: T) -> T +where + T: sealed::VectorSld, +{ + a.vec_sld::(b) +} + +/// Vector Shift Left Double by Words +/// +/// ## Endian considerations +/// +/// This intrinsic is not endian-neutral, so uses of vec_sldw in +/// big-endian code must be rewritten for little-endian targets. +/// +/// The concatenation of a and b is done in big-endian fashion (left to right), and the shift is +/// always to the left. This will generally produce surprising results for little- endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sldw(a: T, b: T) -> T +where + T: sealed::VectorSld, +{ + a.vec_sldw::(b) +} + +/// Vector Shift Left Long +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_sll in big-endian +/// code must be rewritten for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sll(a: T, b: U) -> >::Result +where + T: sealed::VectorSll, +{ + a.vec_sll(b) +} + +/// Vector Shift Right Long +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_srl in big-endian +/// code must be rewritten for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_srl(a: T, b: U) -> >::Result +where + T: sealed::VectorSrl, +{ + a.vec_srl(b) +} + +/// Vector Shift Left by Octets +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_slo in big-endian code must be rewritten +/// for little-endian targets. The shift count is in element 15 of b for big-endian, but in element +/// 0 of b for little-endian. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_slo(a: T, b: U) -> >::Result +where + T: sealed::VectorSlo, +{ + a.vec_slo(b) +} + +/// Vector Shift Right by Octets +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_sro in big-endian code must be rewritten +/// for little-endian targets. The shift count is in element 15 of b for big-endian, but in element +/// 0 of b for little-endian. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sro(a: T, b: U) -> >::Result +where + T: sealed::VectorSro, +{ + a.vec_sro(b) +} + +/// Vector Shift Left Variable +/// +/// ## Result value +/// Let v be a 17-byte vector formed from a in bytes `[0:15]` and a zero byte in element 16. +/// Then each byte element i of r is determined as follows. The start bit sb is +/// obtained from bits 5:7 of byte element i of b. Then the contents of bits sb:sb+7 of the +/// halfword in byte elements i:i+1 of v are placed into byte element i of r. +/// +/// ## Endian considerations +/// All bit and byte element numbers are specified in big-endian order. This intrinsic is not +/// endian-neutral. +#[inline] +#[target_feature(enable = "power9-altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_slv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char { + vslv(a, b) +} + +/// Vector Shift Right Variable +/// +/// ## Result value +/// Let v be a 17-byte vector formed from a zero byte in element 0 and the elements of +/// a in bytes `[1:16]`. Then each byte element i of r is determined as follows. The start bit sb is +/// obtained from bits 5:7 of byte element i of b. Then the contents of bits (8 – sb):(15 – sb) of +/// the halfword in byte elements i:i+1 of v are placed into byte element i of r. +/// +/// ## Endian considerations +/// All bit and byte element numbers are specified in big-endian order. This intrinsic is not +/// endian-neutral. +#[inline] +#[target_feature(enable = "power9-altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_srv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char { + vsrv(a, b) +} + /// Vector Load Indexed. #[inline] #[target_feature(enable = "altivec")] @@ -2692,6 +3281,17 @@ where p.vec_xl(off) } +/// VSX Unaligned Store +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xst(v: T, off: isize, p: ::Out) +where + T: sealed::VectorXst, +{ + v.vec_xst(off, p) +} + /// Vector Base-2 Logarithm Estimate #[inline] #[target_feature(enable = "altivec")] @@ -2766,6 +3366,21 @@ where a.vec_cmpeq(b) } +/// Vector Compare Not Equal +/// +/// ## Result value +/// For each element of r, the value of each bit is 1 if the corresponding elements +/// of a and b are not equal. Otherwise, the value of each bit is 0. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpne(a: T, b: U) -> >::Result +where + T: sealed::VectorCmpNe, +{ + a.vec_cmpne(b) +} + /// Vector cmpb. #[inline] #[target_feature(enable = "altivec")] @@ -2901,11 +3516,11 @@ where } splat! { vec_splat_u8, u8, u8x16 [vspltisb, "Vector Splat to Unsigned Byte"] } -splat! { vec_splat_i8, i8, i8x16 [vspltisb, "Vector Splat to Signed Byte"] } +splat! { vec_splat_s8, i8, i8x16 [vspltisb, "Vector Splat to Signed Byte"] } splat! { vec_splat_u16, u16, u16x8 [vspltish, "Vector Splat to Unsigned Halfword"] } -splat! { vec_splat_i16, i16, i16x8 [vspltish, "Vector Splat to Signed Halfword"] } +splat! { vec_splat_s16, i16, i16x8 [vspltish, "Vector Splat to Signed Halfword"] } splat! { vec_splat_u32, u32, u32x4 [vspltisw, "Vector Splat to Unsigned Word"] } -splat! { vec_splat_i32, i32, i32x4 [vspltisw, "Vector Splat to Signed Word"] } +splat! { vec_splat_s32, i32, i32x4 [vspltisw, "Vector Splat to Signed Word"] } /// Vector splats. #[inline] @@ -2982,6 +3597,22 @@ where a.vec_add(b) } +/// Vector Add Extended +/// +/// ## Result value +/// The value of each element of r is produced by adding the corresponding elements of +/// a and b with a carry specified in the corresponding element of c (1 if there is a carry, 0 +/// otherwise). +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_adde(a: T, b: T, c: T) -> T +where + T: sealed::VectorAdde, +{ + a.vec_adde(b, c) +} + /// Vector Convert to Floating-Point #[inline] #[target_feature(enable = "altivec")] @@ -3579,6 +4210,21 @@ mod tests { } } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_xst() { + let v: vector_unsigned_char = transmute(u8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + )); + + for off in 0..16 { + let mut buf = [0u8; 32]; + vec_xst(v, 0, (buf.as_mut_ptr() as *mut u8).offset(off)); + for i in 0..16 { + assert_eq!(i as u8, buf[off as usize..][i]); + } + } + } + #[simd_test(enable = "altivec")] unsafe fn test_vec_ldl() { let pat = [ @@ -3721,6 +4367,42 @@ mod tests { [false, true, true, false] } + test_vec_2! { test_vec_cmpne_i8, vec_cmpne, i8x16 -> m8x16, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_u8, vec_cmpne, u8x16 -> m8x16, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_i16, vec_cmpne, i16x8 -> m16x8, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_u16, vec_cmpne, u16x8 -> m16x8, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_i32, vec_cmpne, i32x4 -> m32x4, + [1, -1, 0, 0], + [0, -1, 0, 1], + [true, false, false, true] + } + + test_vec_2! { test_vec_cmpne_u32, vec_cmpne, u32x4 -> m32x4, + [1, 255, 0, 0], + [0, 255, 0, 1], + [true, false, false, true] + } + test_vec_2! { test_vec_all_eq_i8_false, vec_all_eq, i8x16 -> bool, [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -4769,9 +5451,9 @@ mod tests { test_vec_splat! { test_vec_splat_u8, vec_splat_u8, u8x16, -1, u8::MAX } test_vec_splat! { test_vec_splat_u16, vec_splat_u16, u16x8, -1, u16::MAX } test_vec_splat! { test_vec_splat_u32, vec_splat_u32, u32x4, -1, u32::MAX } - test_vec_splat! { test_vec_splat_i8, vec_splat_i8, i8x16, -1, -1 } - test_vec_splat! { test_vec_splat_i16, vec_splat_i16, i16x8, -1, -1 } - test_vec_splat! { test_vec_splat_i32, vec_splat_i32, i32x4, -1, -1 } + test_vec_splat! { test_vec_splat_s8, vec_splat_s8, i8x16, -1, -1 } + test_vec_splat! { test_vec_splat_s16, vec_splat_s16, i16x8, -1, -1 } + test_vec_splat! { test_vec_splat_s32, vec_splat_s32, i32x4, -1, -1 } macro_rules! test_vec_sub { { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { diff --git a/crates/core_arch/src/simd_llvm.rs b/crates/core_arch/src/simd_llvm.rs index decdecaaf4..5b6cd0b93b 100644 --- a/crates/core_arch/src/simd_llvm.rs +++ b/crates/core_arch/src/simd_llvm.rs @@ -24,6 +24,7 @@ extern "platform-intrinsic" { pub fn simd_sub(x: T, y: T) -> T; pub fn simd_mul(x: T, y: T) -> T; pub fn simd_div(x: T, y: T) -> T; + pub fn simd_rem(x: T, y: T) -> T; pub fn simd_shl(x: T, y: T) -> T; pub fn simd_shr(x: T, y: T) -> T; pub fn simd_and(x: T, y: T) -> T; diff --git a/crates/intrinsic-test/missing_arm.txt b/crates/intrinsic-test/missing_arm.txt index 7439cd6e66..0ea4cec406 100644 --- a/crates/intrinsic-test/missing_arm.txt +++ b/crates/intrinsic-test/missing_arm.txt @@ -14,8 +14,6 @@ vbfmlaltq_laneq_f32 vbfmmlaq_f32 # Implemented in Clang and stdarch for A64 only even though CSV claims A32 support -__crc32d -__crc32cd vaddq_p64 vbsl_p64 vbslq_p64 diff --git a/crates/std_detect/src/detect/arch/x86.rs b/crates/std_detect/src/detect/arch/x86.rs index 828ac5c38a..f4f45750ed 100644 --- a/crates/std_detect/src/detect/arch/x86.rs +++ b/crates/std_detect/src/detect/arch/x86.rs @@ -75,6 +75,7 @@ features! { /// * `"avx512bitalg"` /// * `"avx512bf16"` /// * `"avx512vp2intersect"` + /// * `"avx512fp16"` /// * `"f16c"` /// * `"fma"` /// * `"bmi1"` @@ -169,6 +170,8 @@ features! { /// AVX-512 BF16 (BFLOAT16 instructions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect"; /// AVX-512 P2INTERSECT + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16"; + /// AVX-512 FP16 (FLOAT16 instructions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; diff --git a/crates/std_detect/src/detect/mod.rs b/crates/std_detect/src/detect/mod.rs index 5ce4e54e23..c938abf17d 100644 --- a/crates/std_detect/src/detect/mod.rs +++ b/crates/std_detect/src/detect/mod.rs @@ -66,6 +66,9 @@ cfg_if! { } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] { #[path = "os/windows/aarch64.rs"] mod os; + } else if #[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "libc"))] { + #[path = "os/macos/aarch64.rs"] + mod os; } else { #[path = "os/other.rs"] mod os; diff --git a/crates/std_detect/src/detect/os/macos/aarch64.rs b/crates/std_detect/src/detect/os/macos/aarch64.rs new file mode 100644 index 0000000000..d7ebd956d6 --- /dev/null +++ b/crates/std_detect/src/detect/os/macos/aarch64.rs @@ -0,0 +1,98 @@ +//! Run-time feature detection for aarch64 on macOS. + +use crate::detect::{cache, Feature}; + +#[inline] +fn _sysctlbyname(name: &str) -> bool { + use libc; + + let mut enabled: i32 = 0; + let mut enabled_len: usize = 4; + let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void; + + let ret = unsafe { + libc::sysctlbyname( + name.as_ptr() as *const i8, + enabled_ptr, + &mut enabled_len, + core::ptr::null_mut(), + 0, + ) + }; + + match ret { + 0 => enabled != 0, + _ => false, + } +} + +/// Try to read the features using sysctlbyname. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + let asimd = _sysctlbyname("hw.optional.AdvSIMD\0"); + let pmull = _sysctlbyname("hw.optional.arm.FEAT_PMULL\0"); + let fp = _sysctlbyname("hw.optional.floatingpoint\0"); + let fp16 = _sysctlbyname("hw.optional.arm.FEAT_FP16\0"); + let crc = _sysctlbyname("hw.optional.armv8_crc32\0"); + let lse = _sysctlbyname("hw.optional.arm.FEAT_LSE\0"); + let lse2 = _sysctlbyname("hw.optional.arm.FEAT_LSE2\0"); + let rdm = _sysctlbyname("hw.optional.arm.FEAT_RDM\0"); + let rcpc = _sysctlbyname("hw.optional.arm.FEAT_LRCPC\0"); + let rcpc2 = _sysctlbyname("hw.optional.arm.FEAT_LRCPC2\0"); + let dotprod = _sysctlbyname("hw.optional.arm.FEAT_DotProd\0"); + let fhm = _sysctlbyname("hw.optional.arm.FEAT_FHM\0"); + let flagm = _sysctlbyname("hw.optional.arm.FEAT_FlagM\0"); + let ssbs = _sysctlbyname("hw.optional.arm.FEAT_SSBS\0"); + let sb = _sysctlbyname("hw.optional.arm.FEAT_SB\0"); + let paca = _sysctlbyname("hw.optional.arm.FEAT_PAuth\0"); + let dpb = _sysctlbyname("hw.optional.arm.FEAT_DPB\0"); + let dpb2 = _sysctlbyname("hw.optional.arm.FEAT_DPB2\0"); + let frintts = _sysctlbyname("hw.optional.arm.FEAT_FRINTTS\0"); + let i8mm = _sysctlbyname("hw.optional.arm.FEAT_I8MM\0"); + let bf16 = _sysctlbyname("hw.optional.arm.FEAT_BF16\0"); + let bti = _sysctlbyname("hw.optional.arm.FEAT_BTI\0"); + let fcma = _sysctlbyname("hw.optional.arm.FEAT_FCMA\0"); + let aes = _sysctlbyname("hw.optional.arm.FEAT_AES\0"); + let sha1 = _sysctlbyname("hw.optional.arm.FEAT_SHA1\0"); + let sha2 = _sysctlbyname("hw.optional.arm.FEAT_SHA256\0"); + let sha3 = _sysctlbyname("hw.optional.arm.FEAT_SHA3\0"); + let sha512 = _sysctlbyname("hw.optional.arm.FEAT_SHA512\0"); + let jsconv = _sysctlbyname("hw.optional.arm.FEAT_JSCVT\0"); + + enable_feature(Feature::asimd, asimd); + enable_feature(Feature::pmull, pmull); + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fp16); + enable_feature(Feature::crc, crc); + enable_feature(Feature::lse, lse); + enable_feature(Feature::lse2, lse2); + enable_feature(Feature::rdm, rdm); + enable_feature(Feature::rcpc, rcpc); + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::dotprod, dotprod); + enable_feature(Feature::fhm, fhm); + enable_feature(Feature::flagm, flagm); + enable_feature(Feature::ssbs, ssbs); + enable_feature(Feature::sb, sb); + enable_feature(Feature::paca, paca); + enable_feature(Feature::dpb, dpb); + enable_feature(Feature::dpb2, dpb2); + enable_feature(Feature::frintts, frintts); + enable_feature(Feature::i8mm, i8mm); + enable_feature(Feature::bf16, bf16); + enable_feature(Feature::bti, bti); + enable_feature(Feature::fcma, fcma); + enable_feature(Feature::aes, aes); + enable_feature(Feature::jsconv, jsconv); + enable_feature(Feature::sha2, sha1 && sha2 && asimd); + enable_feature(Feature::sha3, sha512 && sha3 && asimd); + + value +} diff --git a/crates/std_detect/src/detect/os/x86.rs b/crates/std_detect/src/detect/os/x86.rs index d8dd84db49..4ff9ac5f13 100644 --- a/crates/std_detect/src/detect/os/x86.rs +++ b/crates/std_detect/src/detect/os/x86.rs @@ -69,12 +69,13 @@ pub(crate) fn detect_features() -> cache::Initializer { // EAX = 7, ECX = 0: Queries "Extended Features"; // Contains information about bmi,bmi2, and avx2 support. - let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 { - let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; - (ebx, ecx) - } else { - (0, 0) // CPUID does not support "Extended Features" - }; + let (extended_features_ebx, extended_features_ecx, extended_features_edx) = + if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + (ebx, ecx, edx) + } else { + (0, 0, 0) // CPUID does not support "Extended Features" + }; // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported // - EAX returns the max leaf value for extended information, that is, @@ -217,6 +218,7 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_ecx, 11, Feature::avx512vnni); enable(extended_features_ecx, 12, Feature::avx512bitalg); enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + enable(extended_features_edx, 23, Feature::avx512fp16); } } } diff --git a/crates/std_detect/src/lib.rs b/crates/std_detect/src/lib.rs index 9bdd647313..19cc021712 100644 --- a/crates/std_detect/src/lib.rs +++ b/crates/std_detect/src/lib.rs @@ -13,7 +13,7 @@ //! * `powerpc64`: [`is_powerpc64_feature_detected`] //! * `loongarch`: [`is_loongarch_feature_detected`] -#![stable(feature = "stdsimd", since = "1.27.0")] +#![unstable(feature = "stdarch_internal", issue = "none")] #![feature(staged_api, doc_cfg, allow_internal_unstable)] #![deny(rust_2018_idioms)] #![allow(clippy::shadow_reuse)] @@ -23,7 +23,14 @@ // Temporary hack: needed to build against toolchains from before the mass feature renaming. // Remove this as soon as the stdarch submodule is updated on nightly. #![allow(stable_features)] -#![feature(stdsimd)] +#![cfg_attr(not(feature = "rustc-dep-of-std"), feature(stdsimd))] +#![cfg_attr( + all( + any(target_arch = "x86", target_arch = "x86_64"), + feature = "rustc-dep-of-std" + ), + feature(stdarch_x86_has_cpuid) +)] #[cfg(test)] #[macro_use] @@ -35,5 +42,5 @@ extern crate std; extern crate alloc; #[doc(hidden)] -#[stable(feature = "stdsimd", since = "1.27.0")] +#[unstable(feature = "stdarch_internal", issue = "none")] pub mod detect; diff --git a/crates/std_detect/tests/cpu-detection.rs b/crates/std_detect/tests/cpu-detection.rs index cb57b849d6..1053de3a82 100644 --- a/crates/std_detect/tests/cpu-detection.rs +++ b/crates/std_detect/tests/cpu-detection.rs @@ -139,6 +139,38 @@ fn aarch64_bsd() { println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); } +#[test] +#[cfg(all(target_arch = "aarch64", target_os = "macos"))] +fn aarch64_macos() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {:?}", is_aarch64_feature_detected!("rcpc2")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("fhm: {:?}", is_aarch64_feature_detected!("fhm")); + println!("flagm: {:?}", is_aarch64_feature_detected!("flagm")); + println!("ssbs: {:?}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {:?}", is_aarch64_feature_detected!("sb")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("dpb: {:?}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {:?}", is_aarch64_feature_detected!("dpb2")); + println!("frintts: {:?}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm")); + println!("bf16: {:?}", is_aarch64_feature_detected!("bf16")); + println!("bti: {:?}", is_aarch64_feature_detected!("bti")); + println!("fcma: {:?}", is_aarch64_feature_detected!("fcma")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {:?}", is_aarch64_feature_detected!("sha3")); +} + #[test] #[cfg(all(target_arch = "powerpc", target_os = "linux"))] fn powerpc_linux() { @@ -201,6 +233,7 @@ fn x86_all() { "avx512vp2intersect {:?}", is_x86_feature_detected!("avx512vp2intersect") ); + println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16")); println!("f16c: {:?}", is_x86_feature_detected!("f16c")); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("bmi1: {:?}", is_x86_feature_detected!("bmi1")); diff --git a/crates/std_detect/tests/macro_trailing_commas.rs b/crates/std_detect/tests/macro_trailing_commas.rs index 8304b225f5..d37629ec0a 100644 --- a/crates/std_detect/tests/macro_trailing_commas.rs +++ b/crates/std_detect/tests/macro_trailing_commas.rs @@ -1,3 +1,15 @@ +#![allow(internal_features)] +#![cfg_attr( + any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" + ), + feature(stdarch_internal) +)] #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] diff --git a/crates/std_detect/tests/x86-specific.rs b/crates/std_detect/tests/x86-specific.rs index 54bcab7b1e..ae7f677ed4 100644 --- a/crates/std_detect/tests/x86-specific.rs +++ b/crates/std_detect/tests/x86-specific.rs @@ -1,4 +1,6 @@ #![cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#![allow(internal_features)] +#![feature(stdarch_internal)] extern crate cupid; #[macro_use] @@ -49,6 +51,7 @@ fn dump() { "avx512vp2intersect {:?}", is_x86_feature_detected!("avx512vp2intersect") ); + println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16")); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("abm: {:?}", is_x86_feature_detected!("abm")); println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); diff --git a/examples/connect5.rs b/examples/connect5.rs index 53e9b8124d..a569689fad 100644 --- a/examples/connect5.rs +++ b/examples/connect5.rs @@ -28,9 +28,10 @@ //! You should see a game self-playing. In the end of the game, it shows the average time for //! each move. +#![allow(internal_features)] #![feature(avx512_target_feature)] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512))] -#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512))] +#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512, stdarch_internal))] +#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512, stdarch_internal))] #![feature(stmt_expr_attributes)] use rand::seq::SliceRandom; diff --git a/examples/hex.rs b/examples/hex.rs index 490556e8bf..b73a306f5f 100644 --- a/examples/hex.rs +++ b/examples/hex.rs @@ -12,8 +12,13 @@ //! //! and you should see `746573740a` get printed out. +#![allow(internal_features)] #![feature(wasm_target_feature)] #![cfg_attr(test, feature(test))] +#![cfg_attr( + any(target_arch = "x86", target_arch = "x86_64"), + feature(stdarch_internal) +)] #![allow( clippy::unwrap_used, clippy::print_stdout,