diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index de629914ab..a7c784c1ff 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -43,14 +43,6 @@ unsafe extern "unadjusted" { fn __lasx_xvsrlri_w(a: __v8i32, b: u32) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvsrlri.d"] fn __lasx_xvsrlri_d(a: __v4i64, b: u32) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvbitclr.b"] - fn __lasx_xvbitclr_b(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvbitclr.h"] - fn __lasx_xvbitclr_h(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvbitclr.w"] - fn __lasx_xvbitclr_w(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvbitclr.d"] - fn __lasx_xvbitclr_d(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvbitclri.b"] fn __lasx_xvbitclri_b(a: __v32u8, b: u32) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitclri.h"] @@ -59,14 +51,6 @@ unsafe extern "unadjusted" { fn __lasx_xvbitclri_w(a: __v8u32, b: u32) -> __v8u32; #[link_name = "llvm.loongarch.lasx.xvbitclri.d"] fn __lasx_xvbitclri_d(a: __v4u64, b: u32) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvbitset.b"] - fn __lasx_xvbitset_b(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvbitset.h"] - fn __lasx_xvbitset_h(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvbitset.w"] - fn __lasx_xvbitset_w(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvbitset.d"] - fn __lasx_xvbitset_d(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvbitseti.b"] fn __lasx_xvbitseti_b(a: __v32u8, b: u32) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitseti.h"] @@ -75,14 +59,6 @@ unsafe extern "unadjusted" { fn __lasx_xvbitseti_w(a: __v8u32, b: u32) -> __v8u32; #[link_name = "llvm.loongarch.lasx.xvbitseti.d"] fn __lasx_xvbitseti_d(a: __v4u64, b: u32) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvbitrev.b"] - fn __lasx_xvbitrev_b(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvbitrev.h"] - fn __lasx_xvbitrev_h(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvbitrev.w"] - fn __lasx_xvbitrev_w(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvbitrev.d"] - fn __lasx_xvbitrev_d(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvbitrevi.b"] fn __lasx_xvbitrevi_b(a: __v32u8, b: u32) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitrevi.h"] @@ -1285,34 +1261,6 @@ pub fn lasx_xvsrlri_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvsrlri_d(transmute(a), IMM6)) } } -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_b(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_h(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_w(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_d(transmute(a), transmute(b))) } -} - #[inline(always)] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] @@ -1349,34 +1297,6 @@ pub fn lasx_xvbitclri_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvbitclri_d(transmute(a), IMM6)) } } -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_b(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_h(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_w(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_d(transmute(a), transmute(b))) } -} - #[inline(always)] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] @@ -1413,34 +1333,6 @@ pub fn lasx_xvbitseti_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvbitseti_d(transmute(a), IMM6)) } } -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_b(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_h(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_w(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_d(transmute(a), transmute(b))) } -} - #[inline(always)] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index 0021d7605f..5f306502d9 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -1,201 +1,213 @@ //! LoongArch64 LASX intrinsics - intrinsics::simd implementation -use super::super::{simd::*, *}; -use crate::core_arch::simd::*; -use crate::intrinsics::simd::*; +use super::super::{simd as ls, simd::*, *}; +use crate::core_arch::simd::{self as cs, *}; +use crate::intrinsics::simd as is; use crate::mem::transmute; -impl_vv!("lasx", lasx_xvpcnt_b, simd_ctpop, m256i, i8x32); -impl_vv!("lasx", lasx_xvpcnt_h, simd_ctpop, m256i, i16x16); -impl_vv!("lasx", lasx_xvpcnt_w, simd_ctpop, m256i, i32x8); -impl_vv!("lasx", lasx_xvpcnt_d, simd_ctpop, m256i, i64x4); -impl_vv!("lasx", lasx_xvclz_b, simd_ctlz, m256i, i8x32); -impl_vv!("lasx", lasx_xvclz_h, simd_ctlz, m256i, i16x16); -impl_vv!("lasx", lasx_xvclz_w, simd_ctlz, m256i, i32x8); -impl_vv!("lasx", lasx_xvclz_d, simd_ctlz, m256i, i64x4); -impl_vv!("lasx", lasx_xvneg_b, simd_neg, m256i, i8x32); -impl_vv!("lasx", lasx_xvneg_h, simd_neg, m256i, i16x16); -impl_vv!("lasx", lasx_xvneg_w, simd_neg, m256i, i32x8); -impl_vv!("lasx", lasx_xvneg_d, simd_neg, m256i, i64x4); -impl_vv!("lasx", lasx_xvfsqrt_s, simd_fsqrt, m256, f32x8); -impl_vv!("lasx", lasx_xvfsqrt_d, simd_fsqrt, m256d, f64x4); +impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32); +impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16); +impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8); +impl_vv!("lasx", lasx_xvpcnt_d, is::simd_ctpop, m256i, i64x4); +impl_vv!("lasx", lasx_xvclz_b, is::simd_ctlz, m256i, i8x32); +impl_vv!("lasx", lasx_xvclz_h, is::simd_ctlz, m256i, i16x16); +impl_vv!("lasx", lasx_xvclz_w, is::simd_ctlz, m256i, i32x8); +impl_vv!("lasx", lasx_xvclz_d, is::simd_ctlz, m256i, i64x4); +impl_vv!("lasx", lasx_xvneg_b, is::simd_neg, m256i, i8x32); +impl_vv!("lasx", lasx_xvneg_h, is::simd_neg, m256i, i16x16); +impl_vv!("lasx", lasx_xvneg_w, is::simd_neg, m256i, i32x8); +impl_vv!("lasx", lasx_xvneg_d, is::simd_neg, m256i, i64x4); +impl_vv!("lasx", lasx_xvfsqrt_s, is::simd_fsqrt, m256, f32x8); +impl_vv!("lasx", lasx_xvfsqrt_d, is::simd_fsqrt, m256d, f64x4); -impl_gv!("lasx", lasx_xvreplgr2vr_b, simdl_splat, m256i, i8x32, i32); -impl_gv!("lasx", lasx_xvreplgr2vr_h, simdl_splat, m256i, i16x16, i32); -impl_gv!("lasx", lasx_xvreplgr2vr_w, simdl_splat, m256i, i32x8, i32); -impl_gv!("lasx", lasx_xvreplgr2vr_d, simdl_splat, m256i, i64x4, i64); +impl_gv!("lasx", lasx_xvreplgr2vr_b, ls::simd_splat, m256i, i8x32, i32); +impl_gv!("lasx", lasx_xvreplgr2vr_h, ls::simd_splat, m256i, i16x16, i32); +impl_gv!("lasx", lasx_xvreplgr2vr_w, ls::simd_splat, m256i, i32x8, i32); +impl_gv!("lasx", lasx_xvreplgr2vr_d, ls::simd_splat, m256i, i64x4, i64); -impl_sv!("lasx", lasx_xvrepli_b, simdl_splat, m256i, i8x32, 10); -impl_sv!("lasx", lasx_xvrepli_h, simdl_splat, m256i, i16x16, 10); -impl_sv!("lasx", lasx_xvrepli_w, simdl_splat, m256i, i32x8, 10); -impl_sv!("lasx", lasx_xvrepli_d, simdl_splat, m256i, i64x4, 10); +impl_sv!("lasx", lasx_xvrepli_b, ls::simd_splat, m256i, i8x32, 10); +impl_sv!("lasx", lasx_xvrepli_h, ls::simd_splat, m256i, i16x16, 10); +impl_sv!("lasx", lasx_xvrepli_w, ls::simd_splat, m256i, i32x8, 10); +impl_sv!("lasx", lasx_xvrepli_d, ls::simd_splat, m256i, i64x4, 10); -impl_vvv!("lasx", lasx_xvadd_b, simd_add, m256i, i8x32); -impl_vvv!("lasx", lasx_xvadd_h, simd_add, m256i, i16x16); -impl_vvv!("lasx", lasx_xvadd_w, simd_add, m256i, i32x8); -impl_vvv!("lasx", lasx_xvadd_d, simd_add, m256i, i64x4); -impl_vvv!("lasx", lasx_xvsub_b, simd_sub, m256i, i8x32); -impl_vvv!("lasx", lasx_xvsub_h, simd_sub, m256i, i16x16); -impl_vvv!("lasx", lasx_xvsub_w, simd_sub, m256i, i32x8); -impl_vvv!("lasx", lasx_xvsub_d, simd_sub, m256i, i64x4); -impl_vvv!("lasx", lasx_xvmax_b, simd_imax, m256i, i8x32); -impl_vvv!("lasx", lasx_xvmax_h, simd_imax, m256i, i16x16); -impl_vvv!("lasx", lasx_xvmax_w, simd_imax, m256i, i32x8); -impl_vvv!("lasx", lasx_xvmax_d, simd_imax, m256i, i64x4); -impl_vvv!("lasx", lasx_xvmax_bu, simd_imax, m256i, u8x32); -impl_vvv!("lasx", lasx_xvmax_hu, simd_imax, m256i, u16x16); -impl_vvv!("lasx", lasx_xvmax_wu, simd_imax, m256i, u32x8); -impl_vvv!("lasx", lasx_xvmax_du, simd_imax, m256i, u64x4); -impl_vvv!("lasx", lasx_xvmin_b, simd_imin, m256i, i8x32); -impl_vvv!("lasx", lasx_xvmin_h, simd_imin, m256i, i16x16); -impl_vvv!("lasx", lasx_xvmin_w, simd_imin, m256i, i32x8); -impl_vvv!("lasx", lasx_xvmin_d, simd_imin, m256i, i64x4); -impl_vvv!("lasx", lasx_xvmin_bu, simd_imin, m256i, u8x32); -impl_vvv!("lasx", lasx_xvmin_hu, simd_imin, m256i, u16x16); -impl_vvv!("lasx", lasx_xvmin_wu, simd_imin, m256i, u32x8); -impl_vvv!("lasx", lasx_xvmin_du, simd_imin, m256i, u64x4); -impl_vvv!("lasx", lasx_xvseq_b, simd_eq, m256i, i8x32); -impl_vvv!("lasx", lasx_xvseq_h, simd_eq, m256i, i16x16); -impl_vvv!("lasx", lasx_xvseq_w, simd_eq, m256i, i32x8); -impl_vvv!("lasx", lasx_xvseq_d, simd_eq, m256i, i64x4); -impl_vvv!("lasx", lasx_xvslt_b, simd_lt, m256i, i8x32); -impl_vvv!("lasx", lasx_xvslt_h, simd_lt, m256i, i16x16); -impl_vvv!("lasx", lasx_xvslt_w, simd_lt, m256i, i32x8); -impl_vvv!("lasx", lasx_xvslt_d, simd_lt, m256i, i64x4); -impl_vvv!("lasx", lasx_xvslt_bu, simd_lt, m256i, u8x32); -impl_vvv!("lasx", lasx_xvslt_hu, simd_lt, m256i, u16x16); -impl_vvv!("lasx", lasx_xvslt_wu, simd_lt, m256i, u32x8); -impl_vvv!("lasx", lasx_xvslt_du, simd_lt, m256i, u64x4); -impl_vvv!("lasx", lasx_xvsle_b, simd_le, m256i, i8x32); -impl_vvv!("lasx", lasx_xvsle_h, simd_le, m256i, i16x16); -impl_vvv!("lasx", lasx_xvsle_w, simd_le, m256i, i32x8); -impl_vvv!("lasx", lasx_xvsle_d, simd_le, m256i, i64x4); -impl_vvv!("lasx", lasx_xvsle_bu, simd_le, m256i, u8x32); -impl_vvv!("lasx", lasx_xvsle_hu, simd_le, m256i, u16x16); -impl_vvv!("lasx", lasx_xvsle_wu, simd_le, m256i, u32x8); -impl_vvv!("lasx", lasx_xvsle_du, simd_le, m256i, u64x4); -impl_vvv!("lasx", lasx_xvmul_b, simd_mul, m256i, i8x32); -impl_vvv!("lasx", lasx_xvmul_h, simd_mul, m256i, i16x16); -impl_vvv!("lasx", lasx_xvmul_w, simd_mul, m256i, i32x8); -impl_vvv!("lasx", lasx_xvmul_d, simd_mul, m256i, i64x4); -impl_vvv!("lasx", lasx_xvdiv_b, simd_div, m256i, i8x32); -impl_vvv!("lasx", lasx_xvdiv_h, simd_div, m256i, i16x16); -impl_vvv!("lasx", lasx_xvdiv_w, simd_div, m256i, i32x8); -impl_vvv!("lasx", lasx_xvdiv_d, simd_div, m256i, i64x4); -impl_vvv!("lasx", lasx_xvdiv_bu, simd_div, m256i, u8x32); -impl_vvv!("lasx", lasx_xvdiv_hu, simd_div, m256i, u16x16); -impl_vvv!("lasx", lasx_xvdiv_wu, simd_div, m256i, u32x8); -impl_vvv!("lasx", lasx_xvdiv_du, simd_div, m256i, u64x4); -impl_vvv!("lasx", lasx_xvmod_b, simd_rem, m256i, i8x32); -impl_vvv!("lasx", lasx_xvmod_h, simd_rem, m256i, i16x16); -impl_vvv!("lasx", lasx_xvmod_w, simd_rem, m256i, i32x8); -impl_vvv!("lasx", lasx_xvmod_d, simd_rem, m256i, i64x4); -impl_vvv!("lasx", lasx_xvmod_bu, simd_rem, m256i, u8x32); -impl_vvv!("lasx", lasx_xvmod_hu, simd_rem, m256i, u16x16); -impl_vvv!("lasx", lasx_xvmod_wu, simd_rem, m256i, u32x8); -impl_vvv!("lasx", lasx_xvmod_du, simd_rem, m256i, u64x4); -impl_vvv!("lasx", lasx_xvand_v, simd_and, m256i, u8x32); -impl_vvv!("lasx", lasx_xvandn_v, simdl_andn, m256i, u8x32); -impl_vvv!("lasx", lasx_xvor_v, simd_or, m256i, u8x32); -impl_vvv!("lasx", lasx_xvorn_v, simdl_orn, m256i, u8x32); -impl_vvv!("lasx", lasx_xvnor_v, simdl_nor, m256i, u8x32); -impl_vvv!("lasx", lasx_xvxor_v, simd_xor, m256i, u8x32); -impl_vvv!("lasx", lasx_xvfadd_s, simd_add, m256, f32x8); -impl_vvv!("lasx", lasx_xvfadd_d, simd_add, m256d, f64x4); -impl_vvv!("lasx", lasx_xvfsub_s, simd_sub, m256, f32x8); -impl_vvv!("lasx", lasx_xvfsub_d, simd_sub, m256d, f64x4); -impl_vvv!("lasx", lasx_xvfmul_s, simd_mul, m256, f32x8); -impl_vvv!("lasx", lasx_xvfmul_d, simd_mul, m256d, f64x4); -impl_vvv!("lasx", lasx_xvfdiv_s, simd_div, m256, f32x8); -impl_vvv!("lasx", lasx_xvfdiv_d, simd_div, m256d, f64x4); -impl_vvv!("lasx", lasx_xvsll_b, simdl_shl, m256i, i8x32); -impl_vvv!("lasx", lasx_xvsll_h, simdl_shl, m256i, i16x16); -impl_vvv!("lasx", lasx_xvsll_w, simdl_shl, m256i, i32x8); -impl_vvv!("lasx", lasx_xvsll_d, simdl_shl, m256i, i64x4); -impl_vvv!("lasx", lasx_xvsra_b, simdl_shr, m256i, i8x32); -impl_vvv!("lasx", lasx_xvsra_h, simdl_shr, m256i, i16x16); -impl_vvv!("lasx", lasx_xvsra_w, simdl_shr, m256i, i32x8); -impl_vvv!("lasx", lasx_xvsra_d, simdl_shr, m256i, i64x4); -impl_vvv!("lasx", lasx_xvsrl_b, simdl_shr, m256i, u8x32); -impl_vvv!("lasx", lasx_xvsrl_h, simdl_shr, m256i, u16x16); -impl_vvv!("lasx", lasx_xvsrl_w, simdl_shr, m256i, u32x8); -impl_vvv!("lasx", lasx_xvsrl_d, simdl_shr, m256i, u64x4); +impl_vvv!("lasx", lasx_xvadd_b, is::simd_add, m256i, i8x32); +impl_vvv!("lasx", lasx_xvadd_h, is::simd_add, m256i, i16x16); +impl_vvv!("lasx", lasx_xvadd_w, is::simd_add, m256i, i32x8); +impl_vvv!("lasx", lasx_xvadd_d, is::simd_add, m256i, i64x4); +impl_vvv!("lasx", lasx_xvsub_b, is::simd_sub, m256i, i8x32); +impl_vvv!("lasx", lasx_xvsub_h, is::simd_sub, m256i, i16x16); +impl_vvv!("lasx", lasx_xvsub_w, is::simd_sub, m256i, i32x8); +impl_vvv!("lasx", lasx_xvsub_d, is::simd_sub, m256i, i64x4); +impl_vvv!("lasx", lasx_xvmax_b, cs::simd_imax, m256i, i8x32); +impl_vvv!("lasx", lasx_xvmax_h, cs::simd_imax, m256i, i16x16); +impl_vvv!("lasx", lasx_xvmax_w, cs::simd_imax, m256i, i32x8); +impl_vvv!("lasx", lasx_xvmax_d, cs::simd_imax, m256i, i64x4); +impl_vvv!("lasx", lasx_xvmax_bu, cs::simd_imax, m256i, u8x32); +impl_vvv!("lasx", lasx_xvmax_hu, cs::simd_imax, m256i, u16x16); +impl_vvv!("lasx", lasx_xvmax_wu, cs::simd_imax, m256i, u32x8); +impl_vvv!("lasx", lasx_xvmax_du, cs::simd_imax, m256i, u64x4); +impl_vvv!("lasx", lasx_xvmin_b, cs::simd_imin, m256i, i8x32); +impl_vvv!("lasx", lasx_xvmin_h, cs::simd_imin, m256i, i16x16); +impl_vvv!("lasx", lasx_xvmin_w, cs::simd_imin, m256i, i32x8); +impl_vvv!("lasx", lasx_xvmin_d, cs::simd_imin, m256i, i64x4); +impl_vvv!("lasx", lasx_xvmin_bu, cs::simd_imin, m256i, u8x32); +impl_vvv!("lasx", lasx_xvmin_hu, cs::simd_imin, m256i, u16x16); +impl_vvv!("lasx", lasx_xvmin_wu, cs::simd_imin, m256i, u32x8); +impl_vvv!("lasx", lasx_xvmin_du, cs::simd_imin, m256i, u64x4); +impl_vvv!("lasx", lasx_xvseq_b, is::simd_eq, m256i, i8x32); +impl_vvv!("lasx", lasx_xvseq_h, is::simd_eq, m256i, i16x16); +impl_vvv!("lasx", lasx_xvseq_w, is::simd_eq, m256i, i32x8); +impl_vvv!("lasx", lasx_xvseq_d, is::simd_eq, m256i, i64x4); +impl_vvv!("lasx", lasx_xvslt_b, is::simd_lt, m256i, i8x32); +impl_vvv!("lasx", lasx_xvslt_h, is::simd_lt, m256i, i16x16); +impl_vvv!("lasx", lasx_xvslt_w, is::simd_lt, m256i, i32x8); +impl_vvv!("lasx", lasx_xvslt_d, is::simd_lt, m256i, i64x4); +impl_vvv!("lasx", lasx_xvslt_bu, is::simd_lt, m256i, u8x32); +impl_vvv!("lasx", lasx_xvslt_hu, is::simd_lt, m256i, u16x16); +impl_vvv!("lasx", lasx_xvslt_wu, is::simd_lt, m256i, u32x8); +impl_vvv!("lasx", lasx_xvslt_du, is::simd_lt, m256i, u64x4); +impl_vvv!("lasx", lasx_xvsle_b, is::simd_le, m256i, i8x32); +impl_vvv!("lasx", lasx_xvsle_h, is::simd_le, m256i, i16x16); +impl_vvv!("lasx", lasx_xvsle_w, is::simd_le, m256i, i32x8); +impl_vvv!("lasx", lasx_xvsle_d, is::simd_le, m256i, i64x4); +impl_vvv!("lasx", lasx_xvsle_bu, is::simd_le, m256i, u8x32); +impl_vvv!("lasx", lasx_xvsle_hu, is::simd_le, m256i, u16x16); +impl_vvv!("lasx", lasx_xvsle_wu, is::simd_le, m256i, u32x8); +impl_vvv!("lasx", lasx_xvsle_du, is::simd_le, m256i, u64x4); +impl_vvv!("lasx", lasx_xvmul_b, is::simd_mul, m256i, i8x32); +impl_vvv!("lasx", lasx_xvmul_h, is::simd_mul, m256i, i16x16); +impl_vvv!("lasx", lasx_xvmul_w, is::simd_mul, m256i, i32x8); +impl_vvv!("lasx", lasx_xvmul_d, is::simd_mul, m256i, i64x4); +impl_vvv!("lasx", lasx_xvdiv_b, is::simd_div, m256i, i8x32); +impl_vvv!("lasx", lasx_xvdiv_h, is::simd_div, m256i, i16x16); +impl_vvv!("lasx", lasx_xvdiv_w, is::simd_div, m256i, i32x8); +impl_vvv!("lasx", lasx_xvdiv_d, is::simd_div, m256i, i64x4); +impl_vvv!("lasx", lasx_xvdiv_bu, is::simd_div, m256i, u8x32); +impl_vvv!("lasx", lasx_xvdiv_hu, is::simd_div, m256i, u16x16); +impl_vvv!("lasx", lasx_xvdiv_wu, is::simd_div, m256i, u32x8); +impl_vvv!("lasx", lasx_xvdiv_du, is::simd_div, m256i, u64x4); +impl_vvv!("lasx", lasx_xvmod_b, is::simd_rem, m256i, i8x32); +impl_vvv!("lasx", lasx_xvmod_h, is::simd_rem, m256i, i16x16); +impl_vvv!("lasx", lasx_xvmod_w, is::simd_rem, m256i, i32x8); +impl_vvv!("lasx", lasx_xvmod_d, is::simd_rem, m256i, i64x4); +impl_vvv!("lasx", lasx_xvmod_bu, is::simd_rem, m256i, u8x32); +impl_vvv!("lasx", lasx_xvmod_hu, is::simd_rem, m256i, u16x16); +impl_vvv!("lasx", lasx_xvmod_wu, is::simd_rem, m256i, u32x8); +impl_vvv!("lasx", lasx_xvmod_du, is::simd_rem, m256i, u64x4); +impl_vvv!("lasx", lasx_xvand_v, is::simd_and, m256i, u8x32); +impl_vvv!("lasx", lasx_xvandn_v, ls::simd_andn, m256i, u8x32); +impl_vvv!("lasx", lasx_xvor_v, is::simd_or, m256i, u8x32); +impl_vvv!("lasx", lasx_xvorn_v, ls::simd_orn, m256i, u8x32); +impl_vvv!("lasx", lasx_xvnor_v, ls::simd_nor, m256i, u8x32); +impl_vvv!("lasx", lasx_xvxor_v, is::simd_xor, m256i, u8x32); +impl_vvv!("lasx", lasx_xvfadd_s, is::simd_add, m256, f32x8); +impl_vvv!("lasx", lasx_xvfadd_d, is::simd_add, m256d, f64x4); +impl_vvv!("lasx", lasx_xvfsub_s, is::simd_sub, m256, f32x8); +impl_vvv!("lasx", lasx_xvfsub_d, is::simd_sub, m256d, f64x4); +impl_vvv!("lasx", lasx_xvfmul_s, is::simd_mul, m256, f32x8); +impl_vvv!("lasx", lasx_xvfmul_d, is::simd_mul, m256d, f64x4); +impl_vvv!("lasx", lasx_xvfdiv_s, is::simd_div, m256, f32x8); +impl_vvv!("lasx", lasx_xvfdiv_d, is::simd_div, m256d, f64x4); +impl_vvv!("lasx", lasx_xvsll_b, ls::simd_shl, m256i, i8x32); +impl_vvv!("lasx", lasx_xvsll_h, ls::simd_shl, m256i, i16x16); +impl_vvv!("lasx", lasx_xvsll_w, ls::simd_shl, m256i, i32x8); +impl_vvv!("lasx", lasx_xvsll_d, ls::simd_shl, m256i, i64x4); +impl_vvv!("lasx", lasx_xvsra_b, ls::simd_shr, m256i, i8x32); +impl_vvv!("lasx", lasx_xvsra_h, ls::simd_shr, m256i, i16x16); +impl_vvv!("lasx", lasx_xvsra_w, ls::simd_shr, m256i, i32x8); +impl_vvv!("lasx", lasx_xvsra_d, ls::simd_shr, m256i, i64x4); +impl_vvv!("lasx", lasx_xvsrl_b, ls::simd_shr, m256i, u8x32); +impl_vvv!("lasx", lasx_xvsrl_h, ls::simd_shr, m256i, u16x16); +impl_vvv!("lasx", lasx_xvsrl_w, ls::simd_shr, m256i, u32x8); +impl_vvv!("lasx", lasx_xvsrl_d, ls::simd_shr, m256i, u64x4); +impl_vvv!("lasx", lasx_xvbitclr_b, ls::simd_bitclr, m256i, u8x32); +impl_vvv!("lasx", lasx_xvbitclr_h, ls::simd_bitclr, m256i, u16x16); +impl_vvv!("lasx", lasx_xvbitclr_w, ls::simd_bitclr, m256i, u32x8); +impl_vvv!("lasx", lasx_xvbitclr_d, ls::simd_bitclr, m256i, u64x4); +impl_vvv!("lasx", lasx_xvbitset_b, ls::simd_bitset, m256i, u8x32); +impl_vvv!("lasx", lasx_xvbitset_h, ls::simd_bitset, m256i, u16x16); +impl_vvv!("lasx", lasx_xvbitset_w, ls::simd_bitset, m256i, u32x8); +impl_vvv!("lasx", lasx_xvbitset_d, ls::simd_bitset, m256i, u64x4); +impl_vvv!("lasx", lasx_xvbitrev_b, ls::simd_bitrev, m256i, u8x32); +impl_vvv!("lasx", lasx_xvbitrev_h, ls::simd_bitrev, m256i, u16x16); +impl_vvv!("lasx", lasx_xvbitrev_w, ls::simd_bitrev, m256i, u32x8); +impl_vvv!("lasx", lasx_xvbitrev_d, ls::simd_bitrev, m256i, u64x4); -impl_vuv!("lasx", lasx_xvslli_b, simd_shl, m256i, i8x32); -impl_vuv!("lasx", lasx_xvslli_h, simd_shl, m256i, i16x16); -impl_vuv!("lasx", lasx_xvslli_w, simd_shl, m256i, i32x8); -impl_vuv!("lasx", lasx_xvslli_d, simd_shl, m256i, i64x4); -impl_vuv!("lasx", lasx_xvsrai_b, simd_shr, m256i, i8x32); -impl_vuv!("lasx", lasx_xvsrai_h, simd_shr, m256i, i16x16); -impl_vuv!("lasx", lasx_xvsrai_w, simd_shr, m256i, i32x8); -impl_vuv!("lasx", lasx_xvsrai_d, simd_shr, m256i, i64x4); -impl_vuv!("lasx", lasx_xvsrli_b, simd_shr, m256i, u8x32); -impl_vuv!("lasx", lasx_xvsrli_h, simd_shr, m256i, u16x16); -impl_vuv!("lasx", lasx_xvsrli_w, simd_shr, m256i, u32x8); -impl_vuv!("lasx", lasx_xvsrli_d, simd_shr, m256i, u64x4); -impl_vuv!("lasx", lasx_xvaddi_bu, simd_add, m256i, u8x32, 5); -impl_vuv!("lasx", lasx_xvaddi_hu, simd_add, m256i, u16x16, 5); -impl_vuv!("lasx", lasx_xvaddi_wu, simd_add, m256i, u32x8, 5); -impl_vuv!("lasx", lasx_xvaddi_du, simd_add, m256i, u64x4, 5); -impl_vuv!("lasx", lasx_xvslti_bu, simd_lt, m256i, u8x32, 5); -impl_vuv!("lasx", lasx_xvslti_hu, simd_lt, m256i, u16x16, 5); -impl_vuv!("lasx", lasx_xvslti_wu, simd_lt, m256i, u32x8, 5); -impl_vuv!("lasx", lasx_xvslti_du, simd_lt, m256i, u64x4, 5); -impl_vuv!("lasx", lasx_xvslei_bu, simd_le, m256i, u8x32, 5); -impl_vuv!("lasx", lasx_xvslei_hu, simd_le, m256i, u16x16, 5); -impl_vuv!("lasx", lasx_xvslei_wu, simd_le, m256i, u32x8, 5); -impl_vuv!("lasx", lasx_xvslei_du, simd_le, m256i, u64x4, 5); -impl_vuv!("lasx", lasx_xvmaxi_bu, simd_imax, m256i, u8x32, 5); -impl_vuv!("lasx", lasx_xvmaxi_hu, simd_imax, m256i, u16x16, 5); -impl_vuv!("lasx", lasx_xvmaxi_wu, simd_imax, m256i, u32x8, 5); -impl_vuv!("lasx", lasx_xvmaxi_du, simd_imax, m256i, u64x4, 5); -impl_vuv!("lasx", lasx_xvmini_bu, simd_imin, m256i, u8x32, 5); -impl_vuv!("lasx", lasx_xvmini_hu, simd_imin, m256i, u16x16, 5); -impl_vuv!("lasx", lasx_xvmini_wu, simd_imin, m256i, u32x8, 5); -impl_vuv!("lasx", lasx_xvmini_du, simd_imin, m256i, u64x4, 5); +impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32); +impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16); +impl_vuv!("lasx", lasx_xvslli_w, is::simd_shl, m256i, i32x8); +impl_vuv!("lasx", lasx_xvslli_d, is::simd_shl, m256i, i64x4); +impl_vuv!("lasx", lasx_xvsrai_b, is::simd_shr, m256i, i8x32); +impl_vuv!("lasx", lasx_xvsrai_h, is::simd_shr, m256i, i16x16); +impl_vuv!("lasx", lasx_xvsrai_w, is::simd_shr, m256i, i32x8); +impl_vuv!("lasx", lasx_xvsrai_d, is::simd_shr, m256i, i64x4); +impl_vuv!("lasx", lasx_xvsrli_b, is::simd_shr, m256i, u8x32); +impl_vuv!("lasx", lasx_xvsrli_h, is::simd_shr, m256i, u16x16); +impl_vuv!("lasx", lasx_xvsrli_w, is::simd_shr, m256i, u32x8); +impl_vuv!("lasx", lasx_xvsrli_d, is::simd_shr, m256i, u64x4); +impl_vuv!("lasx", lasx_xvaddi_bu, is::simd_add, m256i, u8x32, 5); +impl_vuv!("lasx", lasx_xvaddi_hu, is::simd_add, m256i, u16x16, 5); +impl_vuv!("lasx", lasx_xvaddi_wu, is::simd_add, m256i, u32x8, 5); +impl_vuv!("lasx", lasx_xvaddi_du, is::simd_add, m256i, u64x4, 5); +impl_vuv!("lasx", lasx_xvslti_bu, is::simd_lt, m256i, u8x32, 5); +impl_vuv!("lasx", lasx_xvslti_hu, is::simd_lt, m256i, u16x16, 5); +impl_vuv!("lasx", lasx_xvslti_wu, is::simd_lt, m256i, u32x8, 5); +impl_vuv!("lasx", lasx_xvslti_du, is::simd_lt, m256i, u64x4, 5); +impl_vuv!("lasx", lasx_xvslei_bu, is::simd_le, m256i, u8x32, 5); +impl_vuv!("lasx", lasx_xvslei_hu, is::simd_le, m256i, u16x16, 5); +impl_vuv!("lasx", lasx_xvslei_wu, is::simd_le, m256i, u32x8, 5); +impl_vuv!("lasx", lasx_xvslei_du, is::simd_le, m256i, u64x4, 5); +impl_vuv!("lasx", lasx_xvmaxi_bu, cs::simd_imax, m256i, u8x32, 5); +impl_vuv!("lasx", lasx_xvmaxi_hu, cs::simd_imax, m256i, u16x16, 5); +impl_vuv!("lasx", lasx_xvmaxi_wu, cs::simd_imax, m256i, u32x8, 5); +impl_vuv!("lasx", lasx_xvmaxi_du, cs::simd_imax, m256i, u64x4, 5); +impl_vuv!("lasx", lasx_xvmini_bu, cs::simd_imin, m256i, u8x32, 5); +impl_vuv!("lasx", lasx_xvmini_hu, cs::simd_imin, m256i, u16x16, 5); +impl_vuv!("lasx", lasx_xvmini_wu, cs::simd_imin, m256i, u32x8, 5); +impl_vuv!("lasx", lasx_xvmini_du, cs::simd_imin, m256i, u64x4, 5); -impl_vug!("lasx", lasx_xvpickve2gr_w, simd_extract, m256i, i32x8, i32, 3); -impl_vug!("lasx", lasx_xvpickve2gr_d, simd_extract, m256i, i64x4, i64, 2); -impl_vug!("lasx", lasx_xvpickve2gr_wu, simd_extract, m256i, u32x8, u32, 3); -impl_vug!("lasx", lasx_xvpickve2gr_du, simd_extract, m256i, u64x4, u64, 2); +impl_vug!("lasx", lasx_xvpickve2gr_w, is::simd_extract, m256i, i32x8, i32, 3); +impl_vug!("lasx", lasx_xvpickve2gr_d, is::simd_extract, m256i, i64x4, i64, 2); +impl_vug!("lasx", lasx_xvpickve2gr_wu, is::simd_extract, m256i, u32x8, u32, 3); +impl_vug!("lasx", lasx_xvpickve2gr_du, is::simd_extract, m256i, u64x4, u64, 2); -impl_vsv!("lasx", lasx_xvseqi_b, simd_eq, m256i, i8x32, 5); -impl_vsv!("lasx", lasx_xvseqi_h, simd_eq, m256i, i16x16, 5); -impl_vsv!("lasx", lasx_xvseqi_w, simd_eq, m256i, i32x8, 5); -impl_vsv!("lasx", lasx_xvseqi_d, simd_eq, m256i, i64x4, 5); -impl_vsv!("lasx", lasx_xvslti_b, simd_lt, m256i, i8x32, 5); -impl_vsv!("lasx", lasx_xvslti_h, simd_lt, m256i, i16x16, 5); -impl_vsv!("lasx", lasx_xvslti_w, simd_lt, m256i, i32x8, 5); -impl_vsv!("lasx", lasx_xvslti_d, simd_lt, m256i, i64x4, 5); -impl_vsv!("lasx", lasx_xvslei_b, simd_le, m256i, i8x32, 5); -impl_vsv!("lasx", lasx_xvslei_h, simd_le, m256i, i16x16, 5); -impl_vsv!("lasx", lasx_xvslei_w, simd_le, m256i, i32x8, 5); -impl_vsv!("lasx", lasx_xvslei_d, simd_le, m256i, i64x4, 5); -impl_vsv!("lasx", lasx_xvmaxi_b, simd_imax, m256i, i8x32, 5); -impl_vsv!("lasx", lasx_xvmaxi_h, simd_imax, m256i, i16x16, 5); -impl_vsv!("lasx", lasx_xvmaxi_w, simd_imax, m256i, i32x8, 5); -impl_vsv!("lasx", lasx_xvmaxi_d, simd_imax, m256i, i64x4, 5); -impl_vsv!("lasx", lasx_xvmini_b, simd_imin, m256i, i8x32, 5); -impl_vsv!("lasx", lasx_xvmini_h, simd_imin, m256i, i16x16, 5); -impl_vsv!("lasx", lasx_xvmini_w, simd_imin, m256i, i32x8, 5); -impl_vsv!("lasx", lasx_xvmini_d, simd_imin, m256i, i64x4, 5); +impl_vsv!("lasx", lasx_xvseqi_b, is::simd_eq, m256i, i8x32, 5); +impl_vsv!("lasx", lasx_xvseqi_h, is::simd_eq, m256i, i16x16, 5); +impl_vsv!("lasx", lasx_xvseqi_w, is::simd_eq, m256i, i32x8, 5); +impl_vsv!("lasx", lasx_xvseqi_d, is::simd_eq, m256i, i64x4, 5); +impl_vsv!("lasx", lasx_xvslti_b, is::simd_lt, m256i, i8x32, 5); +impl_vsv!("lasx", lasx_xvslti_h, is::simd_lt, m256i, i16x16, 5); +impl_vsv!("lasx", lasx_xvslti_w, is::simd_lt, m256i, i32x8, 5); +impl_vsv!("lasx", lasx_xvslti_d, is::simd_lt, m256i, i64x4, 5); +impl_vsv!("lasx", lasx_xvslei_b, is::simd_le, m256i, i8x32, 5); +impl_vsv!("lasx", lasx_xvslei_h, is::simd_le, m256i, i16x16, 5); +impl_vsv!("lasx", lasx_xvslei_w, is::simd_le, m256i, i32x8, 5); +impl_vsv!("lasx", lasx_xvslei_d, is::simd_le, m256i, i64x4, 5); +impl_vsv!("lasx", lasx_xvmaxi_b, cs::simd_imax, m256i, i8x32, 5); +impl_vsv!("lasx", lasx_xvmaxi_h, cs::simd_imax, m256i, i16x16, 5); +impl_vsv!("lasx", lasx_xvmaxi_w, cs::simd_imax, m256i, i32x8, 5); +impl_vsv!("lasx", lasx_xvmaxi_d, cs::simd_imax, m256i, i64x4, 5); +impl_vsv!("lasx", lasx_xvmini_b, cs::simd_imin, m256i, i8x32, 5); +impl_vsv!("lasx", lasx_xvmini_h, cs::simd_imin, m256i, i16x16, 5); +impl_vsv!("lasx", lasx_xvmini_w, cs::simd_imin, m256i, i32x8, 5); +impl_vsv!("lasx", lasx_xvmini_d, cs::simd_imin, m256i, i64x4, 5); -impl_vvvv!("lasx", lasx_xvmadd_b, simdl_madd, m256i, i8x32); -impl_vvvv!("lasx", lasx_xvmadd_h, simdl_madd, m256i, i16x16); -impl_vvvv!("lasx", lasx_xvmadd_w, simdl_madd, m256i, i32x8); -impl_vvvv!("lasx", lasx_xvmadd_d, simdl_madd, m256i, i64x4); -impl_vvvv!("lasx", lasx_xvmsub_b, simdl_msub, m256i, i8x32); -impl_vvvv!("lasx", lasx_xvmsub_h, simdl_msub, m256i, i16x16); -impl_vvvv!("lasx", lasx_xvmsub_w, simdl_msub, m256i, i32x8); -impl_vvvv!("lasx", lasx_xvmsub_d, simdl_msub, m256i, i64x4); -impl_vvvv!("lasx", lasx_xvfmadd_s, simd_fma, m256, f32x8); -impl_vvvv!("lasx", lasx_xvfmadd_d, simd_fma, m256d, f64x4); -impl_vvvv!("lasx", lasx_xvfmsub_s, simdl_fms, m256, f32x8); -impl_vvvv!("lasx", lasx_xvfmsub_d, simdl_fms, m256d, f64x4); -impl_vvvv!("lasx", lasx_xvfnmadd_s, simdl_nfma, m256, f32x8); -impl_vvvv!("lasx", lasx_xvfnmadd_d, simdl_nfma, m256d, f64x4); -impl_vvvv!("lasx", lasx_xvfnmsub_s, simdl_nfms, m256, f32x8); -impl_vvvv!("lasx", lasx_xvfnmsub_d, simdl_nfms, m256d, f64x4); +impl_vvvv!("lasx", lasx_xvmadd_b, ls::simd_madd, m256i, i8x32); +impl_vvvv!("lasx", lasx_xvmadd_h, ls::simd_madd, m256i, i16x16); +impl_vvvv!("lasx", lasx_xvmadd_w, ls::simd_madd, m256i, i32x8); +impl_vvvv!("lasx", lasx_xvmadd_d, ls::simd_madd, m256i, i64x4); +impl_vvvv!("lasx", lasx_xvmsub_b, ls::simd_msub, m256i, i8x32); +impl_vvvv!("lasx", lasx_xvmsub_h, ls::simd_msub, m256i, i16x16); +impl_vvvv!("lasx", lasx_xvmsub_w, ls::simd_msub, m256i, i32x8); +impl_vvvv!("lasx", lasx_xvmsub_d, ls::simd_msub, m256i, i64x4); +impl_vvvv!("lasx", lasx_xvfmadd_s, is::simd_fma, m256, f32x8); +impl_vvvv!("lasx", lasx_xvfmadd_d, is::simd_fma, m256d, f64x4); +impl_vvvv!("lasx", lasx_xvfmsub_s, ls::simd_fmsub, m256, f32x8); +impl_vvvv!("lasx", lasx_xvfmsub_d, ls::simd_fmsub, m256d, f64x4); +impl_vvvv!("lasx", lasx_xvfnmadd_s, ls::simd_fnmadd, m256, f32x8); +impl_vvvv!("lasx", lasx_xvfnmadd_d, ls::simd_fnmadd, m256d, f64x4); +impl_vvvv!("lasx", lasx_xvfnmsub_s, ls::simd_fnmsub, m256, f32x8); +impl_vvvv!("lasx", lasx_xvfnmsub_d, ls::simd_fnmsub, m256d, f64x4); -impl_vugv!("lasx", lasx_xvinsgr2vr_w, simd_insert, m256i, i32x8, i32, 3); -impl_vugv!("lasx", lasx_xvinsgr2vr_d, simd_insert, m256i, i64x4, i64, 2); +impl_vugv!("lasx", lasx_xvinsgr2vr_w, is::simd_insert, m256i, i32x8, i32, 3); +impl_vugv!("lasx", lasx_xvinsgr2vr_d, is::simd_insert, m256i, i64x4, i64, 2); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index d2d77e2f3e..a53057d7bd 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -43,14 +43,6 @@ unsafe extern "unadjusted" { fn __lsx_vsrlri_w(a: __v4i32, b: u32) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vsrlri.d"] fn __lsx_vsrlri_d(a: __v2i64, b: u32) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vbitclr.b"] - fn __lsx_vbitclr_b(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vbitclr.h"] - fn __lsx_vbitclr_h(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vbitclr.w"] - fn __lsx_vbitclr_w(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vbitclr.d"] - fn __lsx_vbitclr_d(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vbitclri.b"] fn __lsx_vbitclri_b(a: __v16u8, b: u32) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitclri.h"] @@ -59,14 +51,6 @@ unsafe extern "unadjusted" { fn __lsx_vbitclri_w(a: __v4u32, b: u32) -> __v4u32; #[link_name = "llvm.loongarch.lsx.vbitclri.d"] fn __lsx_vbitclri_d(a: __v2u64, b: u32) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vbitset.b"] - fn __lsx_vbitset_b(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vbitset.h"] - fn __lsx_vbitset_h(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vbitset.w"] - fn __lsx_vbitset_w(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vbitset.d"] - fn __lsx_vbitset_d(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vbitseti.b"] fn __lsx_vbitseti_b(a: __v16u8, b: u32) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitseti.h"] @@ -75,14 +59,6 @@ unsafe extern "unadjusted" { fn __lsx_vbitseti_w(a: __v4u32, b: u32) -> __v4u32; #[link_name = "llvm.loongarch.lsx.vbitseti.d"] fn __lsx_vbitseti_d(a: __v2u64, b: u32) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vbitrev.b"] - fn __lsx_vbitrev_b(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vbitrev.h"] - fn __lsx_vbitrev_h(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vbitrev.w"] - fn __lsx_vbitrev_w(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vbitrev.d"] - fn __lsx_vbitrev_d(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vbitrevi.b"] fn __lsx_vbitrevi_b(a: __v16u8, b: u32) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitrevi.h"] @@ -1197,34 +1173,6 @@ pub fn lsx_vsrlri_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vsrlri_d(transmute(a), IMM6)) } } -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_b(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_h(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_w(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_d(transmute(a), transmute(b))) } -} - #[inline(always)] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] @@ -1261,34 +1209,6 @@ pub fn lsx_vbitclri_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vbitclri_d(transmute(a), IMM6)) } } -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_b(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_h(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_w(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_d(transmute(a), transmute(b))) } -} - #[inline(always)] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] @@ -1325,34 +1245,6 @@ pub fn lsx_vbitseti_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vbitseti_d(transmute(a), IMM6)) } } -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_b(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_h(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_w(transmute(a), transmute(b))) } -} - -#[inline(always)] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_d(transmute(a), transmute(b))) } -} - #[inline(always)] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index e33b1758f3..f6b8daae4e 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -1,207 +1,219 @@ //! LoongArch64 LSX intrinsics - intrinsics::simd implementation -use super::super::{simd::*, *}; -use crate::core_arch::simd::*; -use crate::intrinsics::simd::*; +use super::super::{simd as ls, simd::*, *}; +use crate::core_arch::simd::{self as cs, *}; +use crate::intrinsics::simd as is; use crate::mem::transmute; -impl_vv!("lsx", lsx_vpcnt_b, simd_ctpop, m128i, i8x16); -impl_vv!("lsx", lsx_vpcnt_h, simd_ctpop, m128i, i16x8); -impl_vv!("lsx", lsx_vpcnt_w, simd_ctpop, m128i, i32x4); -impl_vv!("lsx", lsx_vpcnt_d, simd_ctpop, m128i, i64x2); -impl_vv!("lsx", lsx_vclz_b, simd_ctlz, m128i, i8x16); -impl_vv!("lsx", lsx_vclz_h, simd_ctlz, m128i, i16x8); -impl_vv!("lsx", lsx_vclz_w, simd_ctlz, m128i, i32x4); -impl_vv!("lsx", lsx_vclz_d, simd_ctlz, m128i, i64x2); -impl_vv!("lsx", lsx_vneg_b, simd_neg, m128i, i8x16); -impl_vv!("lsx", lsx_vneg_h, simd_neg, m128i, i16x8); -impl_vv!("lsx", lsx_vneg_w, simd_neg, m128i, i32x4); -impl_vv!("lsx", lsx_vneg_d, simd_neg, m128i, i64x2); -impl_vv!("lsx", lsx_vfsqrt_s, simd_fsqrt, m128, f32x4); -impl_vv!("lsx", lsx_vfsqrt_d, simd_fsqrt, m128d, f64x2); +impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16); +impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8); +impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4); +impl_vv!("lsx", lsx_vpcnt_d, is::simd_ctpop, m128i, i64x2); +impl_vv!("lsx", lsx_vclz_b, is::simd_ctlz, m128i, i8x16); +impl_vv!("lsx", lsx_vclz_h, is::simd_ctlz, m128i, i16x8); +impl_vv!("lsx", lsx_vclz_w, is::simd_ctlz, m128i, i32x4); +impl_vv!("lsx", lsx_vclz_d, is::simd_ctlz, m128i, i64x2); +impl_vv!("lsx", lsx_vneg_b, is::simd_neg, m128i, i8x16); +impl_vv!("lsx", lsx_vneg_h, is::simd_neg, m128i, i16x8); +impl_vv!("lsx", lsx_vneg_w, is::simd_neg, m128i, i32x4); +impl_vv!("lsx", lsx_vneg_d, is::simd_neg, m128i, i64x2); +impl_vv!("lsx", lsx_vfsqrt_s, is::simd_fsqrt, m128, f32x4); +impl_vv!("lsx", lsx_vfsqrt_d, is::simd_fsqrt, m128d, f64x2); -impl_gv!("lsx", lsx_vreplgr2vr_b, simdl_splat, m128i, i8x16, i32); -impl_gv!("lsx", lsx_vreplgr2vr_h, simdl_splat, m128i, i16x8, i32); -impl_gv!("lsx", lsx_vreplgr2vr_w, simdl_splat, m128i, i32x4, i32); -impl_gv!("lsx", lsx_vreplgr2vr_d, simdl_splat, m128i, i64x2, i64); +impl_gv!("lsx", lsx_vreplgr2vr_b, ls::simd_splat, m128i, i8x16, i32); +impl_gv!("lsx", lsx_vreplgr2vr_h, ls::simd_splat, m128i, i16x8, i32); +impl_gv!("lsx", lsx_vreplgr2vr_w, ls::simd_splat, m128i, i32x4, i32); +impl_gv!("lsx", lsx_vreplgr2vr_d, ls::simd_splat, m128i, i64x2, i64); -impl_sv!("lsx", lsx_vrepli_b, simdl_splat, m128i, i8x16, 10); -impl_sv!("lsx", lsx_vrepli_h, simdl_splat, m128i, i16x8, 10); -impl_sv!("lsx", lsx_vrepli_w, simdl_splat, m128i, i32x4, 10); -impl_sv!("lsx", lsx_vrepli_d, simdl_splat, m128i, i64x2, 10); +impl_sv!("lsx", lsx_vrepli_b, ls::simd_splat, m128i, i8x16, 10); +impl_sv!("lsx", lsx_vrepli_h, ls::simd_splat, m128i, i16x8, 10); +impl_sv!("lsx", lsx_vrepli_w, ls::simd_splat, m128i, i32x4, 10); +impl_sv!("lsx", lsx_vrepli_d, ls::simd_splat, m128i, i64x2, 10); -impl_vvv!("lsx", lsx_vadd_b, simd_add, m128i, i8x16); -impl_vvv!("lsx", lsx_vadd_h, simd_add, m128i, i16x8); -impl_vvv!("lsx", lsx_vadd_w, simd_add, m128i, i32x4); -impl_vvv!("lsx", lsx_vadd_d, simd_add, m128i, i64x2); -impl_vvv!("lsx", lsx_vsub_b, simd_sub, m128i, i8x16); -impl_vvv!("lsx", lsx_vsub_h, simd_sub, m128i, i16x8); -impl_vvv!("lsx", lsx_vsub_w, simd_sub, m128i, i32x4); -impl_vvv!("lsx", lsx_vsub_d, simd_sub, m128i, i64x2); -impl_vvv!("lsx", lsx_vmax_b, simd_imax, m128i, i8x16); -impl_vvv!("lsx", lsx_vmax_h, simd_imax, m128i, i16x8); -impl_vvv!("lsx", lsx_vmax_w, simd_imax, m128i, i32x4); -impl_vvv!("lsx", lsx_vmax_d, simd_imax, m128i, i64x2); -impl_vvv!("lsx", lsx_vmax_bu, simd_imax, m128i, u8x16); -impl_vvv!("lsx", lsx_vmax_hu, simd_imax, m128i, u16x8); -impl_vvv!("lsx", lsx_vmax_wu, simd_imax, m128i, u32x4); -impl_vvv!("lsx", lsx_vmax_du, simd_imax, m128i, u64x2); -impl_vvv!("lsx", lsx_vmin_b, simd_imin, m128i, i8x16); -impl_vvv!("lsx", lsx_vmin_h, simd_imin, m128i, i16x8); -impl_vvv!("lsx", lsx_vmin_w, simd_imin, m128i, i32x4); -impl_vvv!("lsx", lsx_vmin_d, simd_imin, m128i, i64x2); -impl_vvv!("lsx", lsx_vmin_bu, simd_imin, m128i, u8x16); -impl_vvv!("lsx", lsx_vmin_hu, simd_imin, m128i, u16x8); -impl_vvv!("lsx", lsx_vmin_wu, simd_imin, m128i, u32x4); -impl_vvv!("lsx", lsx_vmin_du, simd_imin, m128i, u64x2); -impl_vvv!("lsx", lsx_vseq_b, simd_eq, m128i, i8x16); -impl_vvv!("lsx", lsx_vseq_h, simd_eq, m128i, i16x8); -impl_vvv!("lsx", lsx_vseq_w, simd_eq, m128i, i32x4); -impl_vvv!("lsx", lsx_vseq_d, simd_eq, m128i, i64x2); -impl_vvv!("lsx", lsx_vslt_b, simd_lt, m128i, i8x16); -impl_vvv!("lsx", lsx_vslt_h, simd_lt, m128i, i16x8); -impl_vvv!("lsx", lsx_vslt_w, simd_lt, m128i, i32x4); -impl_vvv!("lsx", lsx_vslt_d, simd_lt, m128i, i64x2); -impl_vvv!("lsx", lsx_vslt_bu, simd_lt, m128i, u8x16); -impl_vvv!("lsx", lsx_vslt_hu, simd_lt, m128i, u16x8); -impl_vvv!("lsx", lsx_vslt_wu, simd_lt, m128i, u32x4); -impl_vvv!("lsx", lsx_vslt_du, simd_lt, m128i, u64x2); -impl_vvv!("lsx", lsx_vsle_b, simd_le, m128i, i8x16); -impl_vvv!("lsx", lsx_vsle_h, simd_le, m128i, i16x8); -impl_vvv!("lsx", lsx_vsle_w, simd_le, m128i, i32x4); -impl_vvv!("lsx", lsx_vsle_d, simd_le, m128i, i64x2); -impl_vvv!("lsx", lsx_vsle_bu, simd_le, m128i, u8x16); -impl_vvv!("lsx", lsx_vsle_hu, simd_le, m128i, u16x8); -impl_vvv!("lsx", lsx_vsle_wu, simd_le, m128i, u32x4); -impl_vvv!("lsx", lsx_vsle_du, simd_le, m128i, u64x2); -impl_vvv!("lsx", lsx_vmul_b, simd_mul, m128i, i8x16); -impl_vvv!("lsx", lsx_vmul_h, simd_mul, m128i, i16x8); -impl_vvv!("lsx", lsx_vmul_w, simd_mul, m128i, i32x4); -impl_vvv!("lsx", lsx_vmul_d, simd_mul, m128i, i64x2); -impl_vvv!("lsx", lsx_vdiv_b, simd_div, m128i, i8x16); -impl_vvv!("lsx", lsx_vdiv_h, simd_div, m128i, i16x8); -impl_vvv!("lsx", lsx_vdiv_w, simd_div, m128i, i32x4); -impl_vvv!("lsx", lsx_vdiv_d, simd_div, m128i, i64x2); -impl_vvv!("lsx", lsx_vdiv_bu, simd_div, m128i, u8x16); -impl_vvv!("lsx", lsx_vdiv_hu, simd_div, m128i, u16x8); -impl_vvv!("lsx", lsx_vdiv_wu, simd_div, m128i, u32x4); -impl_vvv!("lsx", lsx_vdiv_du, simd_div, m128i, u64x2); -impl_vvv!("lsx", lsx_vmod_b, simd_rem, m128i, i8x16); -impl_vvv!("lsx", lsx_vmod_h, simd_rem, m128i, i16x8); -impl_vvv!("lsx", lsx_vmod_w, simd_rem, m128i, i32x4); -impl_vvv!("lsx", lsx_vmod_d, simd_rem, m128i, i64x2); -impl_vvv!("lsx", lsx_vmod_bu, simd_rem, m128i, u8x16); -impl_vvv!("lsx", lsx_vmod_hu, simd_rem, m128i, u16x8); -impl_vvv!("lsx", lsx_vmod_wu, simd_rem, m128i, u32x4); -impl_vvv!("lsx", lsx_vmod_du, simd_rem, m128i, u64x2); -impl_vvv!("lsx", lsx_vand_v, simd_and, m128i, u8x16); -impl_vvv!("lsx", lsx_vandn_v, simdl_andn, m128i, u8x16); -impl_vvv!("lsx", lsx_vor_v, simd_or, m128i, u8x16); -impl_vvv!("lsx", lsx_vorn_v, simdl_orn, m128i, u8x16); -impl_vvv!("lsx", lsx_vnor_v, simdl_nor, m128i, u8x16); -impl_vvv!("lsx", lsx_vxor_v, simd_xor, m128i, u8x16); -impl_vvv!("lsx", lsx_vfadd_s, simd_add, m128, f32x4); -impl_vvv!("lsx", lsx_vfadd_d, simd_add, m128d, f64x2); -impl_vvv!("lsx", lsx_vfsub_s, simd_sub, m128, f32x4); -impl_vvv!("lsx", lsx_vfsub_d, simd_sub, m128d, f64x2); -impl_vvv!("lsx", lsx_vfmul_s, simd_mul, m128, f32x4); -impl_vvv!("lsx", lsx_vfmul_d, simd_mul, m128d, f64x2); -impl_vvv!("lsx", lsx_vfdiv_s, simd_div, m128, f32x4); -impl_vvv!("lsx", lsx_vfdiv_d, simd_div, m128d, f64x2); -impl_vvv!("lsx", lsx_vsll_b, simdl_shl, m128i, i8x16); -impl_vvv!("lsx", lsx_vsll_h, simdl_shl, m128i, i16x8); -impl_vvv!("lsx", lsx_vsll_w, simdl_shl, m128i, i32x4); -impl_vvv!("lsx", lsx_vsll_d, simdl_shl, m128i, i64x2); -impl_vvv!("lsx", lsx_vsra_b, simdl_shr, m128i, i8x16); -impl_vvv!("lsx", lsx_vsra_h, simdl_shr, m128i, i16x8); -impl_vvv!("lsx", lsx_vsra_w, simdl_shr, m128i, i32x4); -impl_vvv!("lsx", lsx_vsra_d, simdl_shr, m128i, i64x2); -impl_vvv!("lsx", lsx_vsrl_b, simdl_shr, m128i, u8x16); -impl_vvv!("lsx", lsx_vsrl_h, simdl_shr, m128i, u16x8); -impl_vvv!("lsx", lsx_vsrl_w, simdl_shr, m128i, u32x4); -impl_vvv!("lsx", lsx_vsrl_d, simdl_shr, m128i, u64x2); +impl_vvv!("lsx", lsx_vadd_b, is::simd_add, m128i, i8x16); +impl_vvv!("lsx", lsx_vadd_h, is::simd_add, m128i, i16x8); +impl_vvv!("lsx", lsx_vadd_w, is::simd_add, m128i, i32x4); +impl_vvv!("lsx", lsx_vadd_d, is::simd_add, m128i, i64x2); +impl_vvv!("lsx", lsx_vsub_b, is::simd_sub, m128i, i8x16); +impl_vvv!("lsx", lsx_vsub_h, is::simd_sub, m128i, i16x8); +impl_vvv!("lsx", lsx_vsub_w, is::simd_sub, m128i, i32x4); +impl_vvv!("lsx", lsx_vsub_d, is::simd_sub, m128i, i64x2); +impl_vvv!("lsx", lsx_vmax_b, cs::simd_imax, m128i, i8x16); +impl_vvv!("lsx", lsx_vmax_h, cs::simd_imax, m128i, i16x8); +impl_vvv!("lsx", lsx_vmax_w, cs::simd_imax, m128i, i32x4); +impl_vvv!("lsx", lsx_vmax_d, cs::simd_imax, m128i, i64x2); +impl_vvv!("lsx", lsx_vmax_bu, cs::simd_imax, m128i, u8x16); +impl_vvv!("lsx", lsx_vmax_hu, cs::simd_imax, m128i, u16x8); +impl_vvv!("lsx", lsx_vmax_wu, cs::simd_imax, m128i, u32x4); +impl_vvv!("lsx", lsx_vmax_du, cs::simd_imax, m128i, u64x2); +impl_vvv!("lsx", lsx_vmin_b, cs::simd_imin, m128i, i8x16); +impl_vvv!("lsx", lsx_vmin_h, cs::simd_imin, m128i, i16x8); +impl_vvv!("lsx", lsx_vmin_w, cs::simd_imin, m128i, i32x4); +impl_vvv!("lsx", lsx_vmin_d, cs::simd_imin, m128i, i64x2); +impl_vvv!("lsx", lsx_vmin_bu, cs::simd_imin, m128i, u8x16); +impl_vvv!("lsx", lsx_vmin_hu, cs::simd_imin, m128i, u16x8); +impl_vvv!("lsx", lsx_vmin_wu, cs::simd_imin, m128i, u32x4); +impl_vvv!("lsx", lsx_vmin_du, cs::simd_imin, m128i, u64x2); +impl_vvv!("lsx", lsx_vseq_b, is::simd_eq, m128i, i8x16); +impl_vvv!("lsx", lsx_vseq_h, is::simd_eq, m128i, i16x8); +impl_vvv!("lsx", lsx_vseq_w, is::simd_eq, m128i, i32x4); +impl_vvv!("lsx", lsx_vseq_d, is::simd_eq, m128i, i64x2); +impl_vvv!("lsx", lsx_vslt_b, is::simd_lt, m128i, i8x16); +impl_vvv!("lsx", lsx_vslt_h, is::simd_lt, m128i, i16x8); +impl_vvv!("lsx", lsx_vslt_w, is::simd_lt, m128i, i32x4); +impl_vvv!("lsx", lsx_vslt_d, is::simd_lt, m128i, i64x2); +impl_vvv!("lsx", lsx_vslt_bu, is::simd_lt, m128i, u8x16); +impl_vvv!("lsx", lsx_vslt_hu, is::simd_lt, m128i, u16x8); +impl_vvv!("lsx", lsx_vslt_wu, is::simd_lt, m128i, u32x4); +impl_vvv!("lsx", lsx_vslt_du, is::simd_lt, m128i, u64x2); +impl_vvv!("lsx", lsx_vsle_b, is::simd_le, m128i, i8x16); +impl_vvv!("lsx", lsx_vsle_h, is::simd_le, m128i, i16x8); +impl_vvv!("lsx", lsx_vsle_w, is::simd_le, m128i, i32x4); +impl_vvv!("lsx", lsx_vsle_d, is::simd_le, m128i, i64x2); +impl_vvv!("lsx", lsx_vsle_bu, is::simd_le, m128i, u8x16); +impl_vvv!("lsx", lsx_vsle_hu, is::simd_le, m128i, u16x8); +impl_vvv!("lsx", lsx_vsle_wu, is::simd_le, m128i, u32x4); +impl_vvv!("lsx", lsx_vsle_du, is::simd_le, m128i, u64x2); +impl_vvv!("lsx", lsx_vmul_b, is::simd_mul, m128i, i8x16); +impl_vvv!("lsx", lsx_vmul_h, is::simd_mul, m128i, i16x8); +impl_vvv!("lsx", lsx_vmul_w, is::simd_mul, m128i, i32x4); +impl_vvv!("lsx", lsx_vmul_d, is::simd_mul, m128i, i64x2); +impl_vvv!("lsx", lsx_vdiv_b, is::simd_div, m128i, i8x16); +impl_vvv!("lsx", lsx_vdiv_h, is::simd_div, m128i, i16x8); +impl_vvv!("lsx", lsx_vdiv_w, is::simd_div, m128i, i32x4); +impl_vvv!("lsx", lsx_vdiv_d, is::simd_div, m128i, i64x2); +impl_vvv!("lsx", lsx_vdiv_bu, is::simd_div, m128i, u8x16); +impl_vvv!("lsx", lsx_vdiv_hu, is::simd_div, m128i, u16x8); +impl_vvv!("lsx", lsx_vdiv_wu, is::simd_div, m128i, u32x4); +impl_vvv!("lsx", lsx_vdiv_du, is::simd_div, m128i, u64x2); +impl_vvv!("lsx", lsx_vmod_b, is::simd_rem, m128i, i8x16); +impl_vvv!("lsx", lsx_vmod_h, is::simd_rem, m128i, i16x8); +impl_vvv!("lsx", lsx_vmod_w, is::simd_rem, m128i, i32x4); +impl_vvv!("lsx", lsx_vmod_d, is::simd_rem, m128i, i64x2); +impl_vvv!("lsx", lsx_vmod_bu, is::simd_rem, m128i, u8x16); +impl_vvv!("lsx", lsx_vmod_hu, is::simd_rem, m128i, u16x8); +impl_vvv!("lsx", lsx_vmod_wu, is::simd_rem, m128i, u32x4); +impl_vvv!("lsx", lsx_vmod_du, is::simd_rem, m128i, u64x2); +impl_vvv!("lsx", lsx_vand_v, is::simd_and, m128i, u8x16); +impl_vvv!("lsx", lsx_vandn_v, ls::simd_andn, m128i, u8x16); +impl_vvv!("lsx", lsx_vor_v, is::simd_or, m128i, u8x16); +impl_vvv!("lsx", lsx_vorn_v, ls::simd_orn, m128i, u8x16); +impl_vvv!("lsx", lsx_vnor_v, ls::simd_nor, m128i, u8x16); +impl_vvv!("lsx", lsx_vxor_v, is::simd_xor, m128i, u8x16); +impl_vvv!("lsx", lsx_vfadd_s, is::simd_add, m128, f32x4); +impl_vvv!("lsx", lsx_vfadd_d, is::simd_add, m128d, f64x2); +impl_vvv!("lsx", lsx_vfsub_s, is::simd_sub, m128, f32x4); +impl_vvv!("lsx", lsx_vfsub_d, is::simd_sub, m128d, f64x2); +impl_vvv!("lsx", lsx_vfmul_s, is::simd_mul, m128, f32x4); +impl_vvv!("lsx", lsx_vfmul_d, is::simd_mul, m128d, f64x2); +impl_vvv!("lsx", lsx_vfdiv_s, is::simd_div, m128, f32x4); +impl_vvv!("lsx", lsx_vfdiv_d, is::simd_div, m128d, f64x2); +impl_vvv!("lsx", lsx_vsll_b, ls::simd_shl, m128i, i8x16); +impl_vvv!("lsx", lsx_vsll_h, ls::simd_shl, m128i, i16x8); +impl_vvv!("lsx", lsx_vsll_w, ls::simd_shl, m128i, i32x4); +impl_vvv!("lsx", lsx_vsll_d, ls::simd_shl, m128i, i64x2); +impl_vvv!("lsx", lsx_vsra_b, ls::simd_shr, m128i, i8x16); +impl_vvv!("lsx", lsx_vsra_h, ls::simd_shr, m128i, i16x8); +impl_vvv!("lsx", lsx_vsra_w, ls::simd_shr, m128i, i32x4); +impl_vvv!("lsx", lsx_vsra_d, ls::simd_shr, m128i, i64x2); +impl_vvv!("lsx", lsx_vsrl_b, ls::simd_shr, m128i, u8x16); +impl_vvv!("lsx", lsx_vsrl_h, ls::simd_shr, m128i, u16x8); +impl_vvv!("lsx", lsx_vsrl_w, ls::simd_shr, m128i, u32x4); +impl_vvv!("lsx", lsx_vsrl_d, ls::simd_shr, m128i, u64x2); +impl_vvv!("lsx", lsx_vbitclr_b, ls::simd_bitclr, m128i, u8x16); +impl_vvv!("lsx", lsx_vbitclr_h, ls::simd_bitclr, m128i, u16x8); +impl_vvv!("lsx", lsx_vbitclr_w, ls::simd_bitclr, m128i, u32x4); +impl_vvv!("lsx", lsx_vbitclr_d, ls::simd_bitclr, m128i, u64x2); +impl_vvv!("lsx", lsx_vbitset_b, ls::simd_bitset, m128i, u8x16); +impl_vvv!("lsx", lsx_vbitset_h, ls::simd_bitset, m128i, u16x8); +impl_vvv!("lsx", lsx_vbitset_w, ls::simd_bitset, m128i, u32x4); +impl_vvv!("lsx", lsx_vbitset_d, ls::simd_bitset, m128i, u64x2); +impl_vvv!("lsx", lsx_vbitrev_b, ls::simd_bitrev, m128i, u8x16); +impl_vvv!("lsx", lsx_vbitrev_h, ls::simd_bitrev, m128i, u16x8); +impl_vvv!("lsx", lsx_vbitrev_w, ls::simd_bitrev, m128i, u32x4); +impl_vvv!("lsx", lsx_vbitrev_d, ls::simd_bitrev, m128i, u64x2); -impl_vuv!("lsx", lsx_vslli_b, simd_shl, m128i, i8x16); -impl_vuv!("lsx", lsx_vslli_h, simd_shl, m128i, i16x8); -impl_vuv!("lsx", lsx_vslli_w, simd_shl, m128i, i32x4); -impl_vuv!("lsx", lsx_vslli_d, simd_shl, m128i, i64x2); -impl_vuv!("lsx", lsx_vsrai_b, simd_shr, m128i, i8x16); -impl_vuv!("lsx", lsx_vsrai_h, simd_shr, m128i, i16x8); -impl_vuv!("lsx", lsx_vsrai_w, simd_shr, m128i, i32x4); -impl_vuv!("lsx", lsx_vsrai_d, simd_shr, m128i, i64x2); -impl_vuv!("lsx", lsx_vsrli_b, simd_shr, m128i, u8x16); -impl_vuv!("lsx", lsx_vsrli_h, simd_shr, m128i, u16x8); -impl_vuv!("lsx", lsx_vsrli_w, simd_shr, m128i, u32x4); -impl_vuv!("lsx", lsx_vsrli_d, simd_shr, m128i, u64x2); -impl_vuv!("lsx", lsx_vaddi_bu, simd_add, m128i, u8x16, 5); -impl_vuv!("lsx", lsx_vaddi_hu, simd_add, m128i, u16x8, 5); -impl_vuv!("lsx", lsx_vaddi_wu, simd_add, m128i, u32x4, 5); -impl_vuv!("lsx", lsx_vaddi_du, simd_add, m128i, u64x2, 5); -impl_vuv!("lsx", lsx_vslti_bu, simd_lt, m128i, u8x16, 5); -impl_vuv!("lsx", lsx_vslti_hu, simd_lt, m128i, u16x8, 5); -impl_vuv!("lsx", lsx_vslti_wu, simd_lt, m128i, u32x4, 5); -impl_vuv!("lsx", lsx_vslti_du, simd_lt, m128i, u64x2, 5); -impl_vuv!("lsx", lsx_vslei_bu, simd_le, m128i, u8x16, 5); -impl_vuv!("lsx", lsx_vslei_hu, simd_le, m128i, u16x8, 5); -impl_vuv!("lsx", lsx_vslei_wu, simd_le, m128i, u32x4, 5); -impl_vuv!("lsx", lsx_vslei_du, simd_le, m128i, u64x2, 5); -impl_vuv!("lsx", lsx_vmaxi_bu, simd_imax, m128i, u8x16, 5); -impl_vuv!("lsx", lsx_vmaxi_hu, simd_imax, m128i, u16x8, 5); -impl_vuv!("lsx", lsx_vmaxi_wu, simd_imax, m128i, u32x4, 5); -impl_vuv!("lsx", lsx_vmaxi_du, simd_imax, m128i, u64x2, 5); -impl_vuv!("lsx", lsx_vmini_bu, simd_imin, m128i, u8x16, 5); -impl_vuv!("lsx", lsx_vmini_hu, simd_imin, m128i, u16x8, 5); -impl_vuv!("lsx", lsx_vmini_wu, simd_imin, m128i, u32x4, 5); -impl_vuv!("lsx", lsx_vmini_du, simd_imin, m128i, u64x2, 5); +impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16); +impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8); +impl_vuv!("lsx", lsx_vslli_w, is::simd_shl, m128i, i32x4); +impl_vuv!("lsx", lsx_vslli_d, is::simd_shl, m128i, i64x2); +impl_vuv!("lsx", lsx_vsrai_b, is::simd_shr, m128i, i8x16); +impl_vuv!("lsx", lsx_vsrai_h, is::simd_shr, m128i, i16x8); +impl_vuv!("lsx", lsx_vsrai_w, is::simd_shr, m128i, i32x4); +impl_vuv!("lsx", lsx_vsrai_d, is::simd_shr, m128i, i64x2); +impl_vuv!("lsx", lsx_vsrli_b, is::simd_shr, m128i, u8x16); +impl_vuv!("lsx", lsx_vsrli_h, is::simd_shr, m128i, u16x8); +impl_vuv!("lsx", lsx_vsrli_w, is::simd_shr, m128i, u32x4); +impl_vuv!("lsx", lsx_vsrli_d, is::simd_shr, m128i, u64x2); +impl_vuv!("lsx", lsx_vaddi_bu, is::simd_add, m128i, u8x16, 5); +impl_vuv!("lsx", lsx_vaddi_hu, is::simd_add, m128i, u16x8, 5); +impl_vuv!("lsx", lsx_vaddi_wu, is::simd_add, m128i, u32x4, 5); +impl_vuv!("lsx", lsx_vaddi_du, is::simd_add, m128i, u64x2, 5); +impl_vuv!("lsx", lsx_vslti_bu, is::simd_lt, m128i, u8x16, 5); +impl_vuv!("lsx", lsx_vslti_hu, is::simd_lt, m128i, u16x8, 5); +impl_vuv!("lsx", lsx_vslti_wu, is::simd_lt, m128i, u32x4, 5); +impl_vuv!("lsx", lsx_vslti_du, is::simd_lt, m128i, u64x2, 5); +impl_vuv!("lsx", lsx_vslei_bu, is::simd_le, m128i, u8x16, 5); +impl_vuv!("lsx", lsx_vslei_hu, is::simd_le, m128i, u16x8, 5); +impl_vuv!("lsx", lsx_vslei_wu, is::simd_le, m128i, u32x4, 5); +impl_vuv!("lsx", lsx_vslei_du, is::simd_le, m128i, u64x2, 5); +impl_vuv!("lsx", lsx_vmaxi_bu, cs::simd_imax, m128i, u8x16, 5); +impl_vuv!("lsx", lsx_vmaxi_hu, cs::simd_imax, m128i, u16x8, 5); +impl_vuv!("lsx", lsx_vmaxi_wu, cs::simd_imax, m128i, u32x4, 5); +impl_vuv!("lsx", lsx_vmaxi_du, cs::simd_imax, m128i, u64x2, 5); +impl_vuv!("lsx", lsx_vmini_bu, cs::simd_imin, m128i, u8x16, 5); +impl_vuv!("lsx", lsx_vmini_hu, cs::simd_imin, m128i, u16x8, 5); +impl_vuv!("lsx", lsx_vmini_wu, cs::simd_imin, m128i, u32x4, 5); +impl_vuv!("lsx", lsx_vmini_du, cs::simd_imin, m128i, u64x2, 5); -impl_vug!("lsx", lsx_vpickve2gr_b, simd_extract, m128i, i8x16, i32, 4); -impl_vug!("lsx", lsx_vpickve2gr_h, simd_extract, m128i, i16x8, i32, 3); -impl_vug!("lsx", lsx_vpickve2gr_w, simd_extract, m128i, i32x4, i32, 2); -impl_vug!("lsx", lsx_vpickve2gr_d, simd_extract, m128i, i64x2, i64, 1); -impl_vug!("lsx", lsx_vpickve2gr_bu, simd_extract, m128i, u8x16, u32, 4); -impl_vug!("lsx", lsx_vpickve2gr_hu, simd_extract, m128i, u16x8, u32, 3); -impl_vug!("lsx", lsx_vpickve2gr_wu, simd_extract, m128i, u32x4, u32, 2); -impl_vug!("lsx", lsx_vpickve2gr_du, simd_extract, m128i, u64x2, u64, 1); +impl_vug!("lsx", lsx_vpickve2gr_b, is::simd_extract, m128i, i8x16, i32, 4); +impl_vug!("lsx", lsx_vpickve2gr_h, is::simd_extract, m128i, i16x8, i32, 3); +impl_vug!("lsx", lsx_vpickve2gr_w, is::simd_extract, m128i, i32x4, i32, 2); +impl_vug!("lsx", lsx_vpickve2gr_d, is::simd_extract, m128i, i64x2, i64, 1); +impl_vug!("lsx", lsx_vpickve2gr_bu, is::simd_extract, m128i, u8x16, u32, 4); +impl_vug!("lsx", lsx_vpickve2gr_hu, is::simd_extract, m128i, u16x8, u32, 3); +impl_vug!("lsx", lsx_vpickve2gr_wu, is::simd_extract, m128i, u32x4, u32, 2); +impl_vug!("lsx", lsx_vpickve2gr_du, is::simd_extract, m128i, u64x2, u64, 1); -impl_vsv!("lsx", lsx_vseqi_b, simd_eq, m128i, i8x16, 5); -impl_vsv!("lsx", lsx_vseqi_h, simd_eq, m128i, i16x8, 5); -impl_vsv!("lsx", lsx_vseqi_w, simd_eq, m128i, i32x4, 5); -impl_vsv!("lsx", lsx_vseqi_d, simd_eq, m128i, i64x2, 5); -impl_vsv!("lsx", lsx_vslti_b, simd_lt, m128i, i8x16, 5); -impl_vsv!("lsx", lsx_vslti_h, simd_lt, m128i, i16x8, 5); -impl_vsv!("lsx", lsx_vslti_w, simd_lt, m128i, i32x4, 5); -impl_vsv!("lsx", lsx_vslti_d, simd_lt, m128i, i64x2, 5); -impl_vsv!("lsx", lsx_vslei_b, simd_le, m128i, i8x16, 5); -impl_vsv!("lsx", lsx_vslei_h, simd_le, m128i, i16x8, 5); -impl_vsv!("lsx", lsx_vslei_w, simd_le, m128i, i32x4, 5); -impl_vsv!("lsx", lsx_vslei_d, simd_le, m128i, i64x2, 5); -impl_vsv!("lsx", lsx_vmaxi_b, simd_imax, m128i, i8x16, 5); -impl_vsv!("lsx", lsx_vmaxi_h, simd_imax, m128i, i16x8, 5); -impl_vsv!("lsx", lsx_vmaxi_w, simd_imax, m128i, i32x4, 5); -impl_vsv!("lsx", lsx_vmaxi_d, simd_imax, m128i, i64x2, 5); -impl_vsv!("lsx", lsx_vmini_b, simd_imin, m128i, i8x16, 5); -impl_vsv!("lsx", lsx_vmini_h, simd_imin, m128i, i16x8, 5); -impl_vsv!("lsx", lsx_vmini_w, simd_imin, m128i, i32x4, 5); -impl_vsv!("lsx", lsx_vmini_d, simd_imin, m128i, i64x2, 5); +impl_vsv!("lsx", lsx_vseqi_b, is::simd_eq, m128i, i8x16, 5); +impl_vsv!("lsx", lsx_vseqi_h, is::simd_eq, m128i, i16x8, 5); +impl_vsv!("lsx", lsx_vseqi_w, is::simd_eq, m128i, i32x4, 5); +impl_vsv!("lsx", lsx_vseqi_d, is::simd_eq, m128i, i64x2, 5); +impl_vsv!("lsx", lsx_vslti_b, is::simd_lt, m128i, i8x16, 5); +impl_vsv!("lsx", lsx_vslti_h, is::simd_lt, m128i, i16x8, 5); +impl_vsv!("lsx", lsx_vslti_w, is::simd_lt, m128i, i32x4, 5); +impl_vsv!("lsx", lsx_vslti_d, is::simd_lt, m128i, i64x2, 5); +impl_vsv!("lsx", lsx_vslei_b, is::simd_le, m128i, i8x16, 5); +impl_vsv!("lsx", lsx_vslei_h, is::simd_le, m128i, i16x8, 5); +impl_vsv!("lsx", lsx_vslei_w, is::simd_le, m128i, i32x4, 5); +impl_vsv!("lsx", lsx_vslei_d, is::simd_le, m128i, i64x2, 5); +impl_vsv!("lsx", lsx_vmaxi_b, cs::simd_imax, m128i, i8x16, 5); +impl_vsv!("lsx", lsx_vmaxi_h, cs::simd_imax, m128i, i16x8, 5); +impl_vsv!("lsx", lsx_vmaxi_w, cs::simd_imax, m128i, i32x4, 5); +impl_vsv!("lsx", lsx_vmaxi_d, cs::simd_imax, m128i, i64x2, 5); +impl_vsv!("lsx", lsx_vmini_b, cs::simd_imin, m128i, i8x16, 5); +impl_vsv!("lsx", lsx_vmini_h, cs::simd_imin, m128i, i16x8, 5); +impl_vsv!("lsx", lsx_vmini_w, cs::simd_imin, m128i, i32x4, 5); +impl_vsv!("lsx", lsx_vmini_d, cs::simd_imin, m128i, i64x2, 5); -impl_vvvv!("lsx", lsx_vmadd_b, simdl_madd, m128i, i8x16); -impl_vvvv!("lsx", lsx_vmadd_h, simdl_madd, m128i, i16x8); -impl_vvvv!("lsx", lsx_vmadd_w, simdl_madd, m128i, i32x4); -impl_vvvv!("lsx", lsx_vmadd_d, simdl_madd, m128i, i64x2); -impl_vvvv!("lsx", lsx_vmsub_b, simdl_msub, m128i, i8x16); -impl_vvvv!("lsx", lsx_vmsub_h, simdl_msub, m128i, i16x8); -impl_vvvv!("lsx", lsx_vmsub_w, simdl_msub, m128i, i32x4); -impl_vvvv!("lsx", lsx_vmsub_d, simdl_msub, m128i, i64x2); -impl_vvvv!("lsx", lsx_vfmadd_s, simd_fma, m128, f32x4); -impl_vvvv!("lsx", lsx_vfmadd_d, simd_fma, m128d, f64x2); -impl_vvvv!("lsx", lsx_vfmsub_s, simdl_fms, m128, f32x4); -impl_vvvv!("lsx", lsx_vfmsub_d, simdl_fms, m128d, f64x2); -impl_vvvv!("lsx", lsx_vfnmadd_s, simdl_nfma, m128, f32x4); -impl_vvvv!("lsx", lsx_vfnmadd_d, simdl_nfma, m128d, f64x2); -impl_vvvv!("lsx", lsx_vfnmsub_s, simdl_nfms, m128, f32x4); -impl_vvvv!("lsx", lsx_vfnmsub_d, simdl_nfms, m128d, f64x2); +impl_vvvv!("lsx", lsx_vmadd_b, ls::simd_madd, m128i, i8x16); +impl_vvvv!("lsx", lsx_vmadd_h, ls::simd_madd, m128i, i16x8); +impl_vvvv!("lsx", lsx_vmadd_w, ls::simd_madd, m128i, i32x4); +impl_vvvv!("lsx", lsx_vmadd_d, ls::simd_madd, m128i, i64x2); +impl_vvvv!("lsx", lsx_vmsub_b, ls::simd_msub, m128i, i8x16); +impl_vvvv!("lsx", lsx_vmsub_h, ls::simd_msub, m128i, i16x8); +impl_vvvv!("lsx", lsx_vmsub_w, ls::simd_msub, m128i, i32x4); +impl_vvvv!("lsx", lsx_vmsub_d, ls::simd_msub, m128i, i64x2); +impl_vvvv!("lsx", lsx_vfmadd_s, is::simd_fma, m128, f32x4); +impl_vvvv!("lsx", lsx_vfmadd_d, is::simd_fma, m128d, f64x2); +impl_vvvv!("lsx", lsx_vfmsub_s, ls::simd_fmsub, m128, f32x4); +impl_vvvv!("lsx", lsx_vfmsub_d, ls::simd_fmsub, m128d, f64x2); +impl_vvvv!("lsx", lsx_vfnmadd_s, ls::simd_fnmadd, m128, f32x4); +impl_vvvv!("lsx", lsx_vfnmadd_d, ls::simd_fnmadd, m128d, f64x2); +impl_vvvv!("lsx", lsx_vfnmsub_s, ls::simd_fnmsub, m128, f32x4); +impl_vvvv!("lsx", lsx_vfnmsub_d, ls::simd_fnmsub, m128d, f64x2); -impl_vugv!("lsx", lsx_vinsgr2vr_b, simd_insert, m128i, i8x16, i32, 4); -impl_vugv!("lsx", lsx_vinsgr2vr_h, simd_insert, m128i, i16x8, i32, 3); -impl_vugv!("lsx", lsx_vinsgr2vr_w, simd_insert, m128i, i32x4, i32, 2); -impl_vugv!("lsx", lsx_vinsgr2vr_d, simd_insert, m128i, i64x2, i64, 1); +impl_vugv!("lsx", lsx_vinsgr2vr_b, is::simd_insert, m128i, i8x16, i32, 4); +impl_vugv!("lsx", lsx_vinsgr2vr_h, is::simd_insert, m128i, i16x8, i32, 3); +impl_vugv!("lsx", lsx_vinsgr2vr_w, is::simd_insert, m128i, i32x4, i32, 2); +impl_vugv!("lsx", lsx_vinsgr2vr_d, is::simd_insert, m128i, i64x2, i64, 1); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index ac98d6ac5d..8d1213bd1b 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -1,127 +1,144 @@ //! LoongArch64 SIMD helpers -pub(super) const trait SimdL: Sized { +use self as ls; +use crate::intrinsics::simd as is; + +// Internal extension trait for concrete `Simd` types. +// +// Provides a small set of helper functionality (`Elem` and `splat`) +// so generic and macro-based code can operate on different SIMD +// vector types in a uniform way. +pub(super) const trait SimdExt: Sized { type Elem; unsafe fn splat(v: i64) -> Self; } -macro_rules! impl_simdl { +macro_rules! impl_simd_ext { ($v:ident, $e:ty) => { #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] - impl const SimdL for crate::core_arch::simd::$v { + impl const SimdExt for crate::core_arch::simd::$v { type Elem = $e; #[inline(always)] unsafe fn splat(v: i64) -> Self { - crate::intrinsics::simd::simd_splat(v as Self::Elem) + is::simd_splat(v as Self::Elem) } } }; } -impl_simdl!(i8x16, i8); -impl_simdl!(i8x32, i8); -impl_simdl!(u8x16, u8); -impl_simdl!(u8x32, u8); -impl_simdl!(i16x8, i16); -impl_simdl!(i16x16, i16); -impl_simdl!(u16x8, u16); -impl_simdl!(u16x16, u16); -impl_simdl!(i32x4, i32); -impl_simdl!(i32x8, i32); -impl_simdl!(u32x4, u32); -impl_simdl!(u32x8, u32); -impl_simdl!(i64x2, i64); -impl_simdl!(i64x4, i64); -impl_simdl!(u64x2, u64); -impl_simdl!(u64x4, u64); +impl_simd_ext!(i8x16, i8); +impl_simd_ext!(i8x32, i8); +impl_simd_ext!(u8x16, u8); +impl_simd_ext!(u8x32, u8); +impl_simd_ext!(i16x8, i16); +impl_simd_ext!(i16x16, i16); +impl_simd_ext!(u16x8, u16); +impl_simd_ext!(u16x16, u16); +impl_simd_ext!(i32x4, i32); +impl_simd_ext!(i32x8, i32); +impl_simd_ext!(u32x4, u32); +impl_simd_ext!(u32x8, u32); +impl_simd_ext!(i64x2, i64); +impl_simd_ext!(i64x4, i64); +impl_simd_ext!(u64x2, u64); +impl_simd_ext!(u64x4, u64); #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_andn(a: T, b: T) -> T { - crate::intrinsics::simd::simd_and(simdl_not(a), b) +pub(super) const unsafe fn simd_andn(a: T, b: T) -> T { + is::simd_and(ls::simd_not(a), b) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_fms(a: T, b: T, c: T) -> T { - let c: T = crate::intrinsics::simd::simd_neg(c); - crate::intrinsics::simd::simd_fma(a, b, c) +pub(super) const unsafe fn simd_bitclr(a: T, b: T) -> T { + simd_andn(simd_shl(simd_splat(1), b), a) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_madd(a: T, b: T, c: T) -> T { - let mul: T = crate::intrinsics::simd::simd_mul(b, c); - crate::intrinsics::simd::simd_add(mul, a) +pub(super) const unsafe fn simd_bitrev(a: T, b: T) -> T { + is::simd_xor(simd_shl(simd_splat(1), b), a) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_msub(a: T, b: T, c: T) -> T { - let mul: T = crate::intrinsics::simd::simd_mul(b, c); - crate::intrinsics::simd::simd_sub(a, mul) +pub(super) const unsafe fn simd_bitset(a: T, b: T) -> T { + is::simd_or(simd_shl(simd_splat(1), b), a) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_nfma(a: T, b: T, c: T) -> T { - let fma: T = crate::intrinsics::simd::simd_fma(a, b, c); - crate::intrinsics::simd::simd_neg(fma) +pub(super) const unsafe fn simd_fmsub(a: T, b: T, c: T) -> T { + is::simd_fma(a, b, is::simd_neg(c)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_nfms(a: T, b: T, c: T) -> T { - let fma: T = simdl_fms(a, b, c); - crate::intrinsics::simd::simd_neg(fma) +pub(super) const unsafe fn simd_fnmadd(a: T, b: T, c: T) -> T { + is::simd_neg(is::simd_fma(a, b, c)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_nor(a: T, b: T) -> T { - let or: T = crate::intrinsics::simd::simd_or(a, b); - simdl_not(or) +pub(super) const unsafe fn simd_fnmsub(a: T, b: T, c: T) -> T { + is::simd_neg(ls::simd_fmsub(a, b, c)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_not(a: T) -> T { - let not: T = simdl_splat(!0); - crate::intrinsics::simd::simd_xor(a, not) +pub(super) const unsafe fn simd_madd(a: T, b: T, c: T) -> T { + is::simd_add(a, is::simd_mul(b, c)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_orn(a: T, b: T) -> T { - crate::intrinsics::simd::simd_or(a, simdl_not(b)) +pub(super) const unsafe fn simd_msub(a: T, b: T, c: T) -> T { + is::simd_sub(a, is::simd_mul(b, c)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_shl(a: T, b: T) -> T { - let m: T = simdl_splat((size_of::() * 8 - 1) as i64); - let b: T = crate::intrinsics::simd::simd_and(b, m); - crate::intrinsics::simd::simd_shl(a, b) +pub(super) const unsafe fn simd_nor(a: T, b: T) -> T { + ls::simd_not(is::simd_or(a, b)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_shr(a: T, b: T) -> T { - let m: T = simdl_splat((size_of::() * 8 - 1) as i64); - let b: T = crate::intrinsics::simd::simd_and(b, m); - crate::intrinsics::simd::simd_shr(a, b) +pub(super) const unsafe fn simd_not(a: T) -> T { + is::simd_xor(a, ls::simd_splat(!0)) } #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] -pub(super) const unsafe fn simdl_splat(a: i64) -> T { +pub(super) const unsafe fn simd_orn(a: T, b: T) -> T { + is::simd_or(a, ls::simd_not(b)) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_shl(a: T, b: T) -> T { + let m = (size_of::() * 8 - 1) as i64; + is::simd_shl(a, is::simd_and(b, ls::simd_splat(m))) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_shr(a: T, b: T) -> T { + let m = (size_of::() * 8 - 1) as i64; + is::simd_shr(a, is::simd_and(b, ls::simd_splat(m))) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_splat(a: i64) -> T { T::splat(a) } macro_rules! impl_vv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ty) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => { #[inline(always)] #[target_feature(enable = $ft)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -138,7 +155,7 @@ macro_rules! impl_vv { pub(super) use impl_vv; macro_rules! impl_gv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $gty:ty) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty) => { #[inline(always)] #[target_feature(enable = $ft)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -154,7 +171,7 @@ macro_rules! impl_gv { pub(super) use impl_gv; macro_rules! impl_sv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $ibs:expr) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => { #[inline(always)] #[target_feature(enable = $ft)] #[rustc_legacy_const_generics(0)] @@ -172,7 +189,7 @@ macro_rules! impl_sv { pub(super) use impl_sv; macro_rules! impl_vvv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ty) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => { #[inline(always)] #[target_feature(enable = $ft)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -190,22 +207,22 @@ macro_rules! impl_vvv { pub(super) use impl_vvv; macro_rules! impl_vuv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident) => { #[inline(always)] #[target_feature(enable = $ft)] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] pub fn $name(a: $oty) -> $oty { - static_assert_uimm_bits!(IMM, (size_of::<<$ity as SimdL>::Elem>() * 8).ilog2()); + static_assert_uimm_bits!(IMM, (size_of::<<$ity as SimdExt>::Elem>() * 8).ilog2()); unsafe { let a: $ity = transmute(a); - let b: $ity = simdl_splat(IMM.into()); + let b: $ity = ls::simd_splat(IMM.into()); let r: $ity = $op(a, b); transmute(r) } } }; - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $ibs:expr) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => { #[inline(always)] #[target_feature(enable = $ft)] #[rustc_legacy_const_generics(1)] @@ -214,7 +231,7 @@ macro_rules! impl_vuv { static_assert_uimm_bits!(IMM, $ibs); unsafe { let a: $ity = transmute(a); - let b: $ity = simdl_splat(IMM.into()); + let b: $ity = ls::simd_splat(IMM.into()); let r: $ity = $op(a, b); transmute(r) } @@ -225,7 +242,7 @@ macro_rules! impl_vuv { pub(super) use impl_vuv; macro_rules! impl_vug { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => { #[inline(always)] #[target_feature(enable = $ft)] #[rustc_legacy_const_generics(1)] @@ -234,7 +251,7 @@ macro_rules! impl_vug { static_assert_uimm_bits!(IMM, $ibs); unsafe { let a: $ity = transmute(a); - let r: <$ity as SimdL>::Elem = $op(a, IMM); + let r: <$ity as SimdExt>::Elem = $op(a, IMM); r as $gty } } @@ -244,7 +261,7 @@ macro_rules! impl_vug { pub(super) use impl_vug; macro_rules! impl_vsv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $ibs:expr) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => { #[inline(always)] #[target_feature(enable = $ft)] #[rustc_legacy_const_generics(1)] @@ -253,7 +270,7 @@ macro_rules! impl_vsv { static_assert_simm_bits!(IMM, $ibs); unsafe { let a: $ity = transmute(a); - let b: $ity = simdl_splat(IMM.into()); + let b: $ity = ls::simd_splat(IMM.into()); let r: $ity = $op(a, b); transmute(r) } @@ -264,7 +281,7 @@ macro_rules! impl_vsv { pub(super) use impl_vsv; macro_rules! impl_vvvv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ty) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => { #[inline(always)] #[target_feature(enable = $ft)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -283,7 +300,7 @@ macro_rules! impl_vvvv { pub(super) use impl_vvvv; macro_rules! impl_vugv { - ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => { #[inline(always)] #[target_feature(enable = $ft)] #[rustc_legacy_const_generics(1)] @@ -292,7 +309,7 @@ macro_rules! impl_vugv { static_assert_uimm_bits!(IMM, $ibs); unsafe { let a: $ity = transmute(a); - let r: $ity = $op(a, IMM, b as <$ity as SimdL>::Elem); + let r: $ity = $op(a, IMM, b as <$ity as SimdExt>::Elem); transmute(r) } } diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index 9a9b1a143e..93317fca6b 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -228,21 +228,25 @@ asm-fmts = xd, xj, ui6 data-types = V4DI, V4DI, UQI /// lasx_xvbitclr_b +impl = portable name = lasx_xvbitclr_b asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvbitclr_h +impl = portable name = lasx_xvbitclr_h asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvbitclr_w +impl = portable name = lasx_xvbitclr_w asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvbitclr_d +impl = portable name = lasx_xvbitclr_d asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -268,21 +272,25 @@ asm-fmts = xd, xj, ui6 data-types = UV4DI, UV4DI, UQI /// lasx_xvbitset_b +impl = portable name = lasx_xvbitset_b asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvbitset_h +impl = portable name = lasx_xvbitset_h asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvbitset_w +impl = portable name = lasx_xvbitset_w asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvbitset_d +impl = portable name = lasx_xvbitset_d asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -308,21 +316,25 @@ asm-fmts = xd, xj, ui6 data-types = UV4DI, UV4DI, UQI /// lasx_xvbitrev_b +impl = portable name = lasx_xvbitrev_b asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvbitrev_h +impl = portable name = lasx_xvbitrev_h asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvbitrev_w +impl = portable name = lasx_xvbitrev_w asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvbitrev_d +impl = portable name = lasx_xvbitrev_d asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index 8fd2678899..274ebe4144 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -228,21 +228,25 @@ asm-fmts = vd, vj, ui6 data-types = V2DI, V2DI, UQI /// lsx_vbitclr_b +impl = portable name = lsx_vbitclr_b asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vbitclr_h +impl = portable name = lsx_vbitclr_h asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vbitclr_w +impl = portable name = lsx_vbitclr_w asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vbitclr_d +impl = portable name = lsx_vbitclr_d asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -268,21 +272,25 @@ asm-fmts = vd, vj, ui6 data-types = UV2DI, UV2DI, UQI /// lsx_vbitset_b +impl = portable name = lsx_vbitset_b asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vbitset_h +impl = portable name = lsx_vbitset_h asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vbitset_w +impl = portable name = lsx_vbitset_w asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vbitset_d +impl = portable name = lsx_vbitset_d asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -308,21 +316,25 @@ asm-fmts = vd, vj, ui6 data-types = UV2DI, UV2DI, UQI /// lsx_vbitrev_b +impl = portable name = lsx_vbitrev_b asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vbitrev_h +impl = portable name = lsx_vbitrev_h asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vbitrev_w +impl = portable name = lsx_vbitrev_w asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vbitrev_d +impl = portable name = lsx_vbitrev_d asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index 2d5e9817a3..f78f594cb9 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -191,6 +191,18 @@ lsx_vrepli_b lsx_vrepli_h lsx_vrepli_w lsx_vrepli_d +lsx_vbitclr_b +lsx_vbitclr_h +lsx_vbitclr_w +lsx_vbitclr_d +lsx_vbitset_b +lsx_vbitset_h +lsx_vbitset_w +lsx_vbitset_d +lsx_vbitrev_b +lsx_vbitrev_h +lsx_vbitrev_w +lsx_vbitrev_d # LASX intrinsics lasx_xvsll_b @@ -379,3 +391,15 @@ lasx_xvrepli_b lasx_xvrepli_h lasx_xvrepli_w lasx_xvrepli_d +lasx_xvbitclr_b +lasx_xvbitclr_h +lasx_xvbitclr_w +lasx_xvbitclr_d +lasx_xvbitset_b +lasx_xvbitset_h +lasx_xvbitset_w +lasx_xvbitset_d +lasx_xvbitrev_b +lasx_xvbitrev_h +lasx_xvbitrev_w +lasx_xvbitrev_d