diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 16f6c58cbb..0916befe04 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -51,102 +51,6 @@
-["AVX512_FP16"]
* [ ] [`_mm256_castpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 91fef37895..4c637f49f3 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -743,3 +743,142 @@ simd_ty!(
x6,
x7
);
+
+// 1024-bit wide types:
+simd_ty!(
+ u16x64[u16]:
+ x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31,
+ x32,
+ x33,
+ x34,
+ x35,
+ x36,
+ x37,
+ x38,
+ x39,
+ x40,
+ x41,
+ x42,
+ x43,
+ x44,
+ x45,
+ x46,
+ x47,
+ x48,
+ x49,
+ x50,
+ x51,
+ x52,
+ x53,
+ x54,
+ x55,
+ x56,
+ x57,
+ x58,
+ x59,
+ x60,
+ x61,
+ x62,
+ x63
+);
+simd_ty!(
+ i32x32[i32]:
+ x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31
+);
+simd_ty!(
+ u32x32[u32]:
+ x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31
+);
diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index dd74d11786..66f6ee1259 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -2,7 +2,7 @@ use crate::{
arch::asm,
core_arch::{simd::*, x86::*},
intrinsics::simd::*,
- mem, ptr,
+ ptr,
};
#[cfg(test)]
@@ -17,11 +17,8 @@ use stdarch_test::assert_instr;
#[cfg_attr(test, assert_instr(vpabsw))]
pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i {
let a = a.as_i16x32();
- // all-0 is a properly initialized i16x32
- let zero: i16x32 = mem::zeroed();
- let sub = simd_sub(zero, a);
- let cmp: i16x32 = simd_gt(a, zero);
- transmute(simd_select(cmp, a, sub))
+ let cmp: i16x32 = simd_gt(a, i16x32::splat(0));
+ transmute(simd_select(cmp, a, simd_neg(a)))
}
/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -108,11 +105,8 @@ pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpabsb))]
pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i {
let a = a.as_i8x64();
- // all-0 is a properly initialized i8x64
- let zero: i8x64 = mem::zeroed();
- let sub = simd_sub(zero, a);
- let cmp: i8x64 = simd_gt(a, zero);
- transmute(simd_select(cmp, a, sub))
+ let cmp: i8x64 = simd_gt(a, i8x64::splat(0));
+ transmute(simd_select(cmp, a, simd_neg(a)))
}
/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -370,12 +364,7 @@ pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddusw(
- a.as_u16x32(),
- b.as_u16x32(),
- _mm512_setzero_si512().as_u16x32(),
- 0b11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32()))
}
/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -391,7 +380,8 @@ pub unsafe fn _mm512_mask_adds_epu16(
a: __m512i,
b: __m512i,
) -> __m512i {
- transmute(vpaddusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
+ let add = _mm512_adds_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, add, src.as_u16x32()))
}
/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -402,12 +392,8 @@ pub unsafe fn _mm512_mask_adds_epu16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddusw(
- a.as_u16x32(),
- b.as_u16x32(),
- _mm512_setzero_si512().as_u16x32(),
- k,
- ))
+ let add = _mm512_adds_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, add, u16x32::splat(0)))
}
/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -423,12 +409,8 @@ pub unsafe fn _mm256_mask_adds_epu16(
a: __m256i,
b: __m256i,
) -> __m256i {
- transmute(vpaddusw256(
- a.as_u16x16(),
- b.as_u16x16(),
- src.as_u16x16(),
- k,
- ))
+ let add = _mm256_adds_epu16(a, b).as_u16x16();
+ transmute(simd_select_bitmask(k, add, src.as_u16x16()))
}
/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -439,12 +421,8 @@ pub unsafe fn _mm256_mask_adds_epu16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpaddusw256(
- a.as_u16x16(),
- b.as_u16x16(),
- _mm256_setzero_si256().as_u16x16(),
- k,
- ))
+ let add = _mm256_adds_epu16(a, b).as_u16x16();
+ transmute(simd_select_bitmask(k, add, u16x16::splat(0)))
}
/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -455,7 +433,8 @@ pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k))
+ let add = _mm_adds_epu16(a, b).as_u16x8();
+ transmute(simd_select_bitmask(k, add, src.as_u16x8()))
}
/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -466,12 +445,8 @@ pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddusw128(
- a.as_u16x8(),
- b.as_u16x8(),
- _mm_setzero_si128().as_u16x8(),
- k,
- ))
+ let add = _mm_adds_epu16(a, b).as_u16x8();
+ transmute(simd_select_bitmask(k, add, u16x8::splat(0)))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
@@ -482,12 +457,7 @@ pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddusb(
- a.as_u8x64(),
- b.as_u8x64(),
- _mm512_setzero_si512().as_u8x64(),
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64()))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -498,7 +468,8 @@ pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
+ let add = _mm512_adds_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, add, src.as_u8x64()))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -509,12 +480,8 @@ pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddusb(
- a.as_u8x64(),
- b.as_u8x64(),
- _mm512_setzero_si512().as_u8x64(),
- k,
- ))
+ let add = _mm512_adds_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, add, u8x64::splat(0)))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -525,7 +492,8 @@ pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpaddusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k))
+ let add = _mm256_adds_epu8(a, b).as_u8x32();
+ transmute(simd_select_bitmask(k, add, src.as_u8x32()))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -536,12 +504,8 @@ pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpaddusb256(
- a.as_u8x32(),
- b.as_u8x32(),
- _mm256_setzero_si256().as_u8x32(),
- k,
- ))
+ let add = _mm256_adds_epu8(a, b).as_u8x32();
+ transmute(simd_select_bitmask(k, add, u8x32::splat(0)))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -552,7 +516,8 @@ pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k))
+ let add = _mm_adds_epu8(a, b).as_u8x16();
+ transmute(simd_select_bitmask(k, add, src.as_u8x16()))
}
/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -563,12 +528,8 @@ pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddusb128(
- a.as_u8x16(),
- b.as_u8x16(),
- _mm_setzero_si128().as_u8x16(),
- k,
- ))
+ let add = _mm_adds_epu8(a, b).as_u8x16();
+ transmute(simd_select_bitmask(k, add, u8x16::splat(0)))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
@@ -579,12 +540,7 @@ pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddsw(
- a.as_i16x32(),
- b.as_i16x32(),
- _mm512_setzero_si512().as_i16x32(),
- 0b11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32()))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -600,7 +556,8 @@ pub unsafe fn _mm512_mask_adds_epi16(
a: __m512i,
b: __m512i,
) -> __m512i {
- transmute(vpaddsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
+ let add = _mm512_adds_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, add, src.as_i16x32()))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -611,12 +568,8 @@ pub unsafe fn _mm512_mask_adds_epi16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddsw(
- a.as_i16x32(),
- b.as_i16x32(),
- _mm512_setzero_si512().as_i16x32(),
- k,
- ))
+ let add = _mm512_adds_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, add, i16x32::splat(0)))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -632,7 +585,8 @@ pub unsafe fn _mm256_mask_adds_epi16(
a: __m256i,
b: __m256i,
) -> __m256i {
- transmute(vpaddsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k))
+ let add = _mm256_adds_epi16(a, b).as_i16x16();
+ transmute(simd_select_bitmask(k, add, src.as_i16x16()))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -643,12 +597,8 @@ pub unsafe fn _mm256_mask_adds_epi16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpaddsw256(
- a.as_i16x16(),
- b.as_i16x16(),
- _mm256_setzero_si256().as_i16x16(),
- k,
- ))
+ let add = _mm256_adds_epi16(a, b).as_i16x16();
+ transmute(simd_select_bitmask(k, add, i16x16::splat(0)))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -659,7 +609,8 @@ pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k))
+ let add = _mm_adds_epi16(a, b).as_i16x8();
+ transmute(simd_select_bitmask(k, add, src.as_i16x8()))
}
/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -670,12 +621,8 @@ pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddsw128(
- a.as_i16x8(),
- b.as_i16x8(),
- _mm_setzero_si128().as_i16x8(),
- k,
- ))
+ let add = _mm_adds_epi16(a, b).as_i16x8();
+ transmute(simd_select_bitmask(k, add, i16x8::splat(0)))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
@@ -686,12 +633,7 @@ pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddsb(
- a.as_i8x64(),
- b.as_i8x64(),
- _mm512_setzero_si512().as_i8x64(),
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64()))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -702,7 +644,8 @@ pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
+ let add = _mm512_adds_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, add, src.as_i8x64()))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -713,12 +656,8 @@ pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpaddsb(
- a.as_i8x64(),
- b.as_i8x64(),
- _mm512_setzero_si512().as_i8x64(),
- k,
- ))
+ let add = _mm512_adds_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, add, i8x64::splat(0)))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -729,7 +668,8 @@ pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpaddsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k))
+ let add = _mm256_adds_epi8(a, b).as_i8x32();
+ transmute(simd_select_bitmask(k, add, src.as_i8x32()))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -740,12 +680,8 @@ pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpaddsb256(
- a.as_i8x32(),
- b.as_i8x32(),
- _mm256_setzero_si256().as_i8x32(),
- k,
- ))
+ let add = _mm256_adds_epi8(a, b).as_i8x32();
+ transmute(simd_select_bitmask(k, add, i8x32::splat(0)))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -756,7 +692,8 @@ pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k))
+ let add = _mm_adds_epi8(a, b).as_i8x16();
+ transmute(simd_select_bitmask(k, add, src.as_i8x16()))
}
/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -767,12 +704,8 @@ pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpaddsb128(
- a.as_i8x16(),
- b.as_i8x16(),
- _mm_setzero_si128().as_i8x16(),
- k,
- ))
+ let add = _mm_adds_epi8(a, b).as_i8x16();
+ transmute(simd_select_bitmask(k, add, i8x16::splat(0)))
}
/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
@@ -955,12 +888,7 @@ pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubusw(
- a.as_u16x32(),
- b.as_u16x32(),
- _mm512_setzero_si512().as_u16x32(),
- 0b11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32()))
}
/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -976,7 +904,8 @@ pub unsafe fn _mm512_mask_subs_epu16(
a: __m512i,
b: __m512i,
) -> __m512i {
- transmute(vpsubusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
+ let sub = _mm512_subs_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
}
/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -987,12 +916,8 @@ pub unsafe fn _mm512_mask_subs_epu16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubusw(
- a.as_u16x32(),
- b.as_u16x32(),
- _mm512_setzero_si512().as_u16x32(),
- k,
- ))
+ let sub = _mm512_subs_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, sub, u16x32::splat(0)))
}
/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1008,12 +933,8 @@ pub unsafe fn _mm256_mask_subs_epu16(
a: __m256i,
b: __m256i,
) -> __m256i {
- transmute(vpsubusw256(
- a.as_u16x16(),
- b.as_u16x16(),
- src.as_u16x16(),
- k,
- ))
+ let sub = _mm256_subs_epu16(a, b).as_u16x16();
+ transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
}
/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1024,12 +945,8 @@ pub unsafe fn _mm256_mask_subs_epu16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpsubusw256(
- a.as_u16x16(),
- b.as_u16x16(),
- _mm256_setzero_si256().as_u16x16(),
- k,
- ))
+ let sub = _mm256_subs_epu16(a, b).as_u16x16();
+ transmute(simd_select_bitmask(k, sub, u16x16::splat(0)))
}
/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1040,7 +957,8 @@ pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k))
+ let sub = _mm_subs_epu16(a, b).as_u16x8();
+ transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
}
/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1051,12 +969,8 @@ pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubusw128(
- a.as_u16x8(),
- b.as_u16x8(),
- _mm_setzero_si128().as_u16x8(),
- k,
- ))
+ let sub = _mm_subs_epu16(a, b).as_u16x8();
+ transmute(simd_select_bitmask(k, sub, u16x8::splat(0)))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
@@ -1067,12 +981,7 @@ pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubusb(
- a.as_u8x64(),
- b.as_u8x64(),
- _mm512_setzero_si512().as_u8x64(),
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64()))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1083,7 +992,8 @@ pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
+ let sub = _mm512_subs_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1094,12 +1004,8 @@ pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubusb(
- a.as_u8x64(),
- b.as_u8x64(),
- _mm512_setzero_si512().as_u8x64(),
- k,
- ))
+ let sub = _mm512_subs_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, sub, u8x64::splat(0)))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1110,7 +1016,8 @@ pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpsubusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k))
+ let sub = _mm256_subs_epu8(a, b).as_u8x32();
+ transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1121,12 +1028,8 @@ pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpsubusb256(
- a.as_u8x32(),
- b.as_u8x32(),
- _mm256_setzero_si256().as_u8x32(),
- k,
- ))
+ let sub = _mm256_subs_epu8(a, b).as_u8x32();
+ transmute(simd_select_bitmask(k, sub, u8x32::splat(0)))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1137,7 +1040,8 @@ pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k))
+ let sub = _mm_subs_epu8(a, b).as_u8x16();
+ transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
}
/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1148,12 +1052,8 @@ pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubusb128(
- a.as_u8x16(),
- b.as_u8x16(),
- _mm_setzero_si128().as_u8x16(),
- k,
- ))
+ let sub = _mm_subs_epu8(a, b).as_u8x16();
+ transmute(simd_select_bitmask(k, sub, u8x16::splat(0)))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
@@ -1164,12 +1064,7 @@ pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubsw(
- a.as_i16x32(),
- b.as_i16x32(),
- _mm512_setzero_si512().as_i16x32(),
- 0b11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32()))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1185,7 +1080,8 @@ pub unsafe fn _mm512_mask_subs_epi16(
a: __m512i,
b: __m512i,
) -> __m512i {
- transmute(vpsubsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
+ let sub = _mm512_subs_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1196,12 +1092,8 @@ pub unsafe fn _mm512_mask_subs_epi16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubsw(
- a.as_i16x32(),
- b.as_i16x32(),
- _mm512_setzero_si512().as_i16x32(),
- k,
- ))
+ let sub = _mm512_subs_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, sub, i16x32::splat(0)))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1217,7 +1109,8 @@ pub unsafe fn _mm256_mask_subs_epi16(
a: __m256i,
b: __m256i,
) -> __m256i {
- transmute(vpsubsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k))
+ let sub = _mm256_subs_epi16(a, b).as_i16x16();
+ transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1228,12 +1121,8 @@ pub unsafe fn _mm256_mask_subs_epi16(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpsubsw256(
- a.as_i16x16(),
- b.as_i16x16(),
- _mm256_setzero_si256().as_i16x16(),
- k,
- ))
+ let sub = _mm256_subs_epi16(a, b).as_i16x16();
+ transmute(simd_select_bitmask(k, sub, i16x16::splat(0)))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1244,7 +1133,8 @@ pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k))
+ let sub = _mm_subs_epi16(a, b).as_i16x8();
+ transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
}
/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1255,12 +1145,8 @@ pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubsw128(
- a.as_i16x8(),
- b.as_i16x8(),
- _mm_setzero_si128().as_i16x8(),
- k,
- ))
+ let sub = _mm_subs_epi16(a, b).as_i16x8();
+ transmute(simd_select_bitmask(k, sub, i16x8::splat(0)))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
@@ -1271,12 +1157,7 @@ pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubsb(
- a.as_i8x64(),
- b.as_i8x64(),
- _mm512_setzero_si512().as_i8x64(),
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
- ))
+ transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64()))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1287,7 +1168,8 @@ pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
+ let sub = _mm512_subs_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1298,12 +1180,8 @@ pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
- transmute(vpsubsb(
- a.as_i8x64(),
- b.as_i8x64(),
- _mm512_setzero_si512().as_i8x64(),
- k,
- ))
+ let sub = _mm512_subs_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, sub, i8x64::splat(0)))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1314,7 +1192,8 @@ pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpsubsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k))
+ let sub = _mm256_subs_epi8(a, b).as_i8x32();
+ transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1325,12 +1204,8 @@ pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
- transmute(vpsubsb256(
- a.as_i8x32(),
- b.as_i8x32(),
- _mm256_setzero_si256().as_i8x32(),
- k,
- ))
+ let sub = _mm256_subs_epi8(a, b).as_i8x32();
+ transmute(simd_select_bitmask(k, sub, i8x32::splat(0)))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1341,7 +1216,8 @@ pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k))
+ let sub = _mm_subs_epi8(a, b).as_i8x16();
+ transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
}
/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -1352,12 +1228,8 @@ pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
- transmute(vpsubsb128(
- a.as_i8x16(),
- b.as_i8x16(),
- _mm_setzero_si128().as_i8x16(),
- k,
- ))
+ let sub = _mm_subs_epi8(a, b).as_i8x16();
+ transmute(simd_select_bitmask(k, sub, i8x16::splat(0)))
}
/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
@@ -1368,7 +1240,10 @@ pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmulhuw))]
pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmulhuw(a.as_u16x32(), b.as_u16x32()))
+ let a = simd_cast::<_, u32x32>(a.as_u16x32());
+ let b = simd_cast::<_, u32x32>(b.as_u16x32());
+ let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
+ transmute(simd_cast::(r))
}
/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1464,7 +1339,10 @@ pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmulhw))]
pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmulhw(a.as_i16x32(), b.as_i16x32()))
+ let a = simd_cast::<_, i32x32>(a.as_i16x32());
+ let b = simd_cast::<_, i32x32>(b.as_i16x32());
+ let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
+ transmute(simd_cast::(r))
}
/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1752,7 +1630,9 @@ pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxuw))]
pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxuw(a.as_u16x32(), b.as_u16x32()))
+ let a = a.as_u16x32();
+ let b = b.as_u16x32();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1838,7 +1718,9 @@ pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxub))]
pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxub(a.as_u8x64(), b.as_u8x64()))
+ let a = a.as_u8x64();
+ let b = b.as_u8x64();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1924,7 +1806,9 @@ pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxsw))]
pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxsw(a.as_i16x32(), b.as_i16x32()))
+ let a = a.as_i16x32();
+ let b = b.as_i16x32();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2010,7 +1894,9 @@ pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxsb))]
pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxsb(a.as_i8x64(), b.as_i8x64()))
+ let a = a.as_i8x64();
+ let b = b.as_i8x64();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2096,7 +1982,9 @@ pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminuw))]
pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminuw(a.as_u16x32(), b.as_u16x32()))
+ let a = a.as_u16x32();
+ let b = b.as_u16x32();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2182,7 +2070,9 @@ pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminub))]
pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminub(a.as_u8x64(), b.as_u8x64()))
+ let a = a.as_u8x64();
+ let b = b.as_u8x64();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2268,7 +2158,9 @@ pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminsw))]
pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminsw(a.as_i16x32(), b.as_i16x32()))
+ let a = a.as_i16x32();
+ let b = b.as_i16x32();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2354,7 +2246,9 @@ pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminsb))]
pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminsb(a.as_i8x64(), b.as_i8x64()))
+ let a = a.as_i8x64();
+ let b = b.as_i8x64();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2451,7 +2345,7 @@ pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmplt_epu16_mask(a, b) & k1
+ _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2473,7 +2367,7 @@ pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmplt_epu16_mask(a, b) & k1
+ _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2495,7 +2389,7 @@ pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmplt_epu16_mask(a, b) & k1
+ _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2517,7 +2411,7 @@ pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmplt_epu8_mask(a, b) & k1
+ _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2539,7 +2433,7 @@ pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmplt_epu8_mask(a, b) & k1
+ _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2561,7 +2455,7 @@ pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmplt_epu8_mask(a, b) & k1
+ _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2583,7 +2477,7 @@ pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmplt_epi16_mask(a, b) & k1
+ _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2605,7 +2499,7 @@ pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmplt_epi16_mask(a, b) & k1
+ _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2627,7 +2521,7 @@ pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmplt_epi16_mask(a, b) & k1
+ _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2649,7 +2543,7 @@ pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmplt_epi8_mask(a, b) & k1
+ _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2671,7 +2565,7 @@ pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmplt_epi8_mask(a, b) & k1
+ _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -2693,7 +2587,7 @@ pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmplt_epi8_mask(a, b) & k1
+ _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2715,7 +2609,7 @@ pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpgt_epu16_mask(a, b) & k1
+ _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2737,7 +2631,7 @@ pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpgt_epu16_mask(a, b) & k1
+ _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2759,7 +2653,7 @@ pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpgt_epu16_mask(a, b) & k1
+ _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2781,7 +2675,7 @@ pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpgt_epu8_mask(a, b) & k1
+ _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2803,7 +2697,7 @@ pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpgt_epu8_mask(a, b) & k1
+ _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2825,7 +2719,7 @@ pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpgt_epu8_mask(a, b) & k1
+ _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2847,7 +2741,7 @@ pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpgt_epi16_mask(a, b) & k1
+ _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2869,7 +2763,7 @@ pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpgt_epi16_mask(a, b) & k1
+ _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2891,7 +2785,7 @@ pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpgt_epi16_mask(a, b) & k1
+ _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2913,7 +2807,7 @@ pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpgt_epi8_mask(a, b) & k1
+ _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2935,7 +2829,7 @@ pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpgt_epi8_mask(a, b) & k1
+ _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -2957,7 +2851,7 @@ pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpgt_epi8_mask(a, b) & k1
+ _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -2979,7 +2873,7 @@ pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmple_epu16_mask(a, b) & k1
+ _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3001,7 +2895,7 @@ pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmple_epu16_mask(a, b) & k1
+ _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3023,7 +2917,7 @@ pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmple_epu16_mask(a, b) & k1
+ _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3045,7 +2939,7 @@ pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmple_epu8_mask(a, b) & k1
+ _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3067,7 +2961,7 @@ pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmple_epu8_mask(a, b) & k1
+ _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3089,7 +2983,7 @@ pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmple_epu8_mask(a, b) & k1
+ _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3111,7 +3005,7 @@ pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmple_epi16_mask(a, b) & k1
+ _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3133,7 +3027,7 @@ pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmple_epi16_mask(a, b) & k1
+ _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3155,7 +3049,7 @@ pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmple_epi16_mask(a, b) & k1
+ _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3177,7 +3071,7 @@ pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmple_epi8_mask(a, b) & k1
+ _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3199,7 +3093,7 @@ pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmple_epi8_mask(a, b) & k1
+ _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -3221,7 +3115,7 @@ pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmple_epi8_mask(a, b) & k1
+ _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3243,7 +3137,7 @@ pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpge_epu16_mask(a, b) & k1
+ _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3265,7 +3159,7 @@ pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpge_epu16_mask(a, b) & k1
+ _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3287,7 +3181,7 @@ pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpge_epu16_mask(a, b) & k1
+ _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3309,7 +3203,7 @@ pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpge_epu8_mask(a, b) & k1
+ _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3331,7 +3225,7 @@ pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpge_epu8_mask(a, b) & k1
+ _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3353,7 +3247,7 @@ pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpge_epu8_mask(a, b) & k1
+ _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3375,7 +3269,7 @@ pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpge_epi16_mask(a, b) & k1
+ _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3397,7 +3291,7 @@ pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpge_epi16_mask(a, b) & k1
+ _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3419,7 +3313,7 @@ pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpge_epi16_mask(a, b) & k1
+ _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3441,7 +3335,7 @@ pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpge_epi8_mask(a, b) & k1
+ _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3463,7 +3357,7 @@ pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpge_epi8_mask(a, b) & k1
+ _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -3485,7 +3379,7 @@ pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpge_epi8_mask(a, b) & k1
+ _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3507,7 +3401,7 @@ pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpeq_epu16_mask(a, b) & k1
+ _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3529,7 +3423,7 @@ pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpeq_epu16_mask(a, b) & k1
+ _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3551,7 +3445,7 @@ pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpeq_epu16_mask(a, b) & k1
+ _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3573,7 +3467,7 @@ pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpeq_epu8_mask(a, b) & k1
+ _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3595,7 +3489,7 @@ pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpeq_epu8_mask(a, b) & k1
+ _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3617,7 +3511,7 @@ pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpeq_epu8_mask(a, b) & k1
+ _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3639,7 +3533,7 @@ pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpeq_epi16_mask(a, b) & k1
+ _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3661,7 +3555,7 @@ pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpeq_epi16_mask(a, b) & k1
+ _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3683,7 +3577,7 @@ pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpeq_epi16_mask(a, b) & k1
+ _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3705,7 +3599,7 @@ pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpeq_epi8_mask(a, b) & k1
+ _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3727,7 +3621,7 @@ pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpeq_epi8_mask(a, b) & k1
+ _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
@@ -3749,7 +3643,7 @@ pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpeq_epi8_mask(a, b) & k1
+ _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3771,7 +3665,7 @@ pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpneq_epu16_mask(a, b) & k1
+ _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3793,7 +3687,7 @@ pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpneq_epu16_mask(a, b) & k1
+ _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3815,7 +3709,7 @@ pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpneq_epu16_mask(a, b) & k1
+ _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3837,7 +3731,7 @@ pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpneq_epu8_mask(a, b) & k1
+ _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3859,7 +3753,7 @@ pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpneq_epu8_mask(a, b) & k1
+ _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3881,7 +3775,7 @@ pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpneq_epu8_mask(a, b) & k1
+ _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3903,7 +3797,7 @@ pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
- _mm512_cmpneq_epi16_mask(a, b) & k1
+ _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3925,7 +3819,7 @@ pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
- _mm256_cmpneq_epi16_mask(a, b) & k1
+ _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3947,7 +3841,7 @@ pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpneq_epi16_mask(a, b) & k1
+ _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3969,7 +3863,7 @@ pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
- _mm512_cmpneq_epi8_mask(a, b) & k1
+ _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -3991,7 +3885,7 @@ pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
- _mm256_cmpneq_epi8_mask(a, b) & k1
+ _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -4013,7 +3907,7 @@ pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
- _mm_cmpneq_epi8_mask(a, b) & k1
+ _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
@@ -4028,7 +3922,17 @@ pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x32();
let b = b.as_u16x32();
- vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i16x32::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i16x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4047,7 +3951,18 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x32();
let b = b.as_u16x32();
- vpcmpuw(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i16x32::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i16x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4062,7 +3977,17 @@ pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x16();
let b = b.as_u16x16();
- vpcmpuw256(a, b, IMM8, 0b11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i16x16::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i16x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4081,7 +4006,18 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x16();
let b = b.as_u16x16();
- vpcmpuw256(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i16x16::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i16x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4096,7 +4032,17 @@ pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __m
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x8();
let b = b.as_u16x8();
- vpcmpuw128(a, b, IMM8, 0b11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i16x8::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i16x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4115,7 +4061,18 @@ pub unsafe fn _mm_mask_cmp_epu16_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x8();
let b = b.as_u16x8();
- vpcmpuw128(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i16x8::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i16x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4130,12 +4087,17 @@ pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x64();
let b = b.as_u8x64();
- vpcmpub(
- a,
- b,
- IMM8,
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
- )
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i8x64::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i8x64::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4154,7 +4116,18 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x64();
let b = b.as_u8x64();
- vpcmpub(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i8x64::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i8x64::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4169,7 +4142,17 @@ pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x32();
let b = b.as_u8x32();
- vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i8x32::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i8x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4188,7 +4171,18 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x32();
let b = b.as_u8x32();
- vpcmpub256(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i8x32::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i8x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4203,7 +4197,17 @@ pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mm
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x16();
let b = b.as_u8x16();
- vpcmpub128(a, b, IMM8, 0b11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i8x16::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i8x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4222,7 +4226,18 @@ pub unsafe fn _mm_mask_cmp_epu8_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x16();
let b = b.as_u8x16();
- vpcmpub128(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i8x16::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i8x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4237,7 +4252,17 @@ pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x32();
let b = b.as_i16x32();
- vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i16x32::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i16x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4256,7 +4281,18 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x32();
let b = b.as_i16x32();
- vpcmpw(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i16x32::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i16x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4271,7 +4307,17 @@ pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x16();
let b = b.as_i16x16();
- vpcmpw256(a, b, IMM8, 0b11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i16x16::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i16x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4290,7 +4336,18 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x16();
let b = b.as_i16x16();
- vpcmpw256(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i16x16::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i16x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4305,7 +4362,17 @@ pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __m
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x8();
let b = b.as_i16x8();
- vpcmpw128(a, b, IMM8, 0b11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i16x8::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i16x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4324,7 +4391,18 @@ pub unsafe fn _mm_mask_cmp_epi16_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x8();
let b = b.as_i16x8();
- vpcmpw128(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i16x8::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i16x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4339,12 +4417,17 @@ pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x64();
let b = b.as_i8x64();
- vpcmpb(
- a,
- b,
- IMM8,
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
- )
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i8x64::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i8x64::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4363,7 +4446,18 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x64();
let b = b.as_i8x64();
- vpcmpb(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i8x64::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i8x64::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4378,7 +4472,17 @@ pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x32();
let b = b.as_i8x32();
- vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i8x32::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i8x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4397,7 +4501,18 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x32();
let b = b.as_i8x32();
- vpcmpb256(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i8x32::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i8x32::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4412,7 +4527,17 @@ pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mm
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x16();
let b = b.as_i8x16();
- vpcmpb128(a, b, IMM8, 0b11111111_11111111)
+ let r = match IMM8 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i8x16::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i8x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4431,139 +4556,838 @@ pub unsafe fn _mm_mask_cmp_epi8_mask(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x16();
let b = b.as_i8x16();
- vpcmpb128(a, b, IMM8, k1)
+ let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::splat(0));
+ let r = match IMM8 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i8x16::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i8x16::splat(-1),
+ };
+ simd_bitmask(r)
}
-/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
#[inline]
-#[target_feature(enable = "avx512bw")]
+#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
- ptr::read_unaligned(mem_addr as *const __m512i)
+pub unsafe fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
+ simd_reduce_add_unordered(a.as_i16x16())
}
-/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
- ptr::read_unaligned(mem_addr as *const __m256i)
+pub unsafe fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
+ simd_reduce_add_unordered(simd_select_bitmask(
+ k,
+ a.as_i16x16(),
+ _mm256_setzero_si256().as_i16x16(),
+ ))
}
-/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
- ptr::read_unaligned(mem_addr as *const __m128i)
+pub unsafe fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
+ simd_reduce_add_unordered(a.as_i16x8())
}
-/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
#[inline]
-#[target_feature(enable = "avx512bw")]
+#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
- ptr::read_unaligned(mem_addr as *const __m512i)
+pub unsafe fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
+ simd_reduce_add_unordered(simd_select_bitmask(
+ k,
+ a.as_i16x8(),
+ _mm_setzero_si128().as_i16x8(),
+ ))
}
-/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
- ptr::read_unaligned(mem_addr as *const __m256i)
+pub unsafe fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
+ simd_reduce_add_unordered(a.as_i8x32())
}
-/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
- ptr::read_unaligned(mem_addr as *const __m128i)
+pub unsafe fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
+ simd_reduce_add_unordered(simd_select_bitmask(
+ k,
+ a.as_i8x32(),
+ _mm256_setzero_si256().as_i8x32(),
+ ))
}
-/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
#[inline]
-#[target_feature(enable = "avx512bw")]
+#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
- ptr::write_unaligned(mem_addr as *mut __m512i, a);
+pub unsafe fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
+ simd_reduce_add_unordered(a.as_i8x16())
}
-/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
- ptr::write_unaligned(mem_addr as *mut __m256i, a);
+pub unsafe fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
+ simd_reduce_add_unordered(simd_select_bitmask(
+ k,
+ a.as_i8x16(),
+ _mm_setzero_si128().as_i8x16(),
+ ))
}
-/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
- ptr::write_unaligned(mem_addr as *mut __m128i, a);
+pub unsafe fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
+ simd_reduce_and(a.as_i16x16())
}
-/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
#[inline]
-#[target_feature(enable = "avx512bw")]
+#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
- ptr::write_unaligned(mem_addr as *mut __m512i, a);
+pub unsafe fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
+ simd_reduce_and(simd_select_bitmask(
+ k,
+ a.as_i16x16(),
+ _mm256_set1_epi64x(-1).as_i16x16(),
+ ))
}
-/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
- ptr::write_unaligned(mem_addr as *mut __m256i, a);
+pub unsafe fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
+ simd_reduce_and(a.as_i16x8())
}
-/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
- ptr::write_unaligned(mem_addr as *mut __m128i, a);
+pub unsafe fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
+ simd_reduce_and(simd_select_bitmask(
+ k,
+ a.as_i16x8(),
+ _mm_set1_epi64x(-1).as_i16x8(),
+ ))
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
+ simd_reduce_and(a.as_i8x32())
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
+ simd_reduce_and(simd_select_bitmask(
+ k,
+ a.as_i8x32(),
+ _mm256_set1_epi64x(-1).as_i8x32(),
+ ))
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
+ simd_reduce_and(a.as_i8x16())
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
+ simd_reduce_and(simd_select_bitmask(
+ k,
+ a.as_i8x16(),
+ _mm_set1_epi64x(-1).as_i8x16(),
+ ))
+}
+
+/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
+ simd_reduce_max(a.as_i16x16())
+}
+
+/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768)))
+}
+
+/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
+ simd_reduce_max(a.as_i16x8())
+}
+
+/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768)))
+}
+
+/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
+ simd_reduce_max(a.as_i8x32())
+}
+
+/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128)))
+}
+
+/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
+ simd_reduce_max(a.as_i8x16())
+}
+
+/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128)))
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
+ simd_reduce_max(a.as_u16x16())
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0)))
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
+ simd_reduce_max(a.as_u16x8())
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0)))
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
+ simd_reduce_max(a.as_u8x32())
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0)))
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
+ simd_reduce_max(a.as_u8x16())
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
+ simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0)))
+}
+
+/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
+ simd_reduce_min(a.as_i16x16())
+}
+
+/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff)))
+}
+
+/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
+ simd_reduce_min(a.as_i16x8())
+}
+
+/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff)))
+}
+
+/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
+ simd_reduce_min(a.as_i8x32())
+}
+
+/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f)))
+}
+
+/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
+ simd_reduce_min(a.as_i8x16())
+}
+
+/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f)))
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
+ simd_reduce_min(a.as_u16x16())
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff)))
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
+ simd_reduce_min(a.as_u16x8())
+}
+
+/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff)))
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
+ simd_reduce_min(a.as_u8x32())
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff)))
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
+ simd_reduce_min(a.as_u8x16())
+}
+
+/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
+ simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff)))
+}
+
+/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
+ simd_reduce_mul_unordered(a.as_i16x16())
+}
+
+/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
+ simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)))
+}
+
+/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
+ simd_reduce_mul_unordered(a.as_i16x8())
+}
+
+/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
+ simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)))
+}
+
+/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
+ simd_reduce_mul_unordered(a.as_i8x32())
+}
+
+/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
+ simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)))
+}
+
+/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
+ simd_reduce_mul_unordered(a.as_i8x16())
+}
+
+/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
+ simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)))
+}
+
+/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
+ simd_reduce_or(a.as_i16x16())
+}
+
+/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
+ simd_reduce_or(simd_select_bitmask(
+ k,
+ a.as_i16x16(),
+ _mm256_setzero_si256().as_i16x16(),
+ ))
+}
+
+/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
+ simd_reduce_or(a.as_i16x8())
+}
+
+/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
+ simd_reduce_or(simd_select_bitmask(
+ k,
+ a.as_i16x8(),
+ _mm_setzero_si128().as_i16x8(),
+ ))
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
+ simd_reduce_or(a.as_i8x32())
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
+ simd_reduce_or(simd_select_bitmask(
+ k,
+ a.as_i8x32(),
+ _mm256_setzero_si256().as_i8x32(),
+ ))
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
+ simd_reduce_or(a.as_i8x16())
+}
+
+/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
+ simd_reduce_or(simd_select_bitmask(
+ k,
+ a.as_i8x16(),
+ _mm_setzero_si128().as_i8x16(),
+ ))
+}
+
+/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
+ ptr::read_unaligned(mem_addr as *const __m512i)
+}
+
+/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
+ ptr::read_unaligned(mem_addr as *const __m256i)
+}
+
+/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
+ ptr::read_unaligned(mem_addr as *const __m128i)
+}
+
+/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
+ ptr::read_unaligned(mem_addr as *const __m512i)
+}
+
+/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
+ ptr::read_unaligned(mem_addr as *const __m256i)
+}
+
+/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
+ ptr::read_unaligned(mem_addr as *const __m128i)
+}
+
+/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
+ ptr::write_unaligned(mem_addr as *mut __m512i, a);
+}
+
+/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
+ ptr::write_unaligned(mem_addr as *mut __m256i, a);
+}
+
+/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
+ ptr::write_unaligned(mem_addr as *mut __m128i, a);
+}
+
+/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
+ ptr::write_unaligned(mem_addr as *mut __m512i, a);
+}
+
+/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
+ ptr::write_unaligned(mem_addr as *mut __m256i, a);
+}
+
+/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
+#[inline]
+#[target_feature(enable = "avx512bw,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
+ ptr::write_unaligned(mem_addr as *mut __m128i, a);
}
/// Load packed 16-bit integers from memory into dst using writemask k
@@ -5505,7 +6329,10 @@ pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpavgw))]
pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpavgw(a.as_u16x32(), b.as_u16x32()))
+ let a = simd_cast::<_, u32x32>(a.as_u16x32());
+ let b = simd_cast::<_, u32x32>(b.as_u16x32());
+ let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
+ transmute(simd_cast::<_, u16x32>(r))
}
/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -5591,7 +6418,10 @@ pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpavgb))]
pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpavgb(a.as_u8x64(), b.as_u8x64()))
+ let a = simd_cast::<_, u16x64>(a.as_u8x64());
+ let b = simd_cast::<_, u16x64>(b.as_u8x64());
+ let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
+ transmute(simd_cast::<_, u8x64>(r))
}
/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -9221,6 +10051,26 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
transmute(simd_select_bitmask(k, one, zero))
}
+/// Convert 32-bit mask a into an integer value, and store the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _cvtmask32_u32(a: __mmask32) -> u32 {
+ a
+}
+
+/// Convert integer value a into an 32-bit mask, and store the result in k.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _cvtu32_mask32(a: u32) -> __mmask32 {
+ a
+}
+
/// Add 32-bit masks in a and b, and store the result in k.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
@@ -9257,108 +10107,314 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
- a & b
+pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+ a & b
+}
+
+/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
+ !a
+}
+
+/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
+ !a
+}
+
+/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+ _knot_mask32(a) & b
+}
+
+/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+ _knot_mask64(a) & b
+}
+
+/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+ a | b
+}
+
+/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+ a | b
+}
+
+/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+ a ^ b
+}
+
+/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+ a ^ b
+}
+
+/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+ _knot_mask32(a ^ b)
+}
+
+/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+ _knot_mask64(a ^ b)
+}
+
+/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
+ let tmp = _kor_mask32(a, b);
+ *all_ones = (tmp == 0xffffffff) as u8;
+ (tmp == 0) as u8
+}
+
+/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
+ let tmp = _kor_mask64(a, b);
+ *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
+ (tmp == 0) as u8
+}
+
+/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
+/// store 0 in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+ (_kor_mask32(a, b) == 0xffffffff) as u8
+}
+
+/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
+/// store 0 in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+ (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
+}
+
+/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+ (_kor_mask32(a, b) == 0) as u8
+}
+
+/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+ (_kor_mask64(a, b) == 0) as u8
+}
+
+/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kshiftli_mask32(a: __mmask32) -> __mmask32 {
+ a << COUNT
+}
+
+/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kshiftli_mask64(a: __mmask64) -> __mmask64 {
+ a << COUNT
}
-/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
+/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
#[inline]
#[target_feature(enable = "avx512bw")]
+#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
- a ^ 0b11111111_11111111_11111111_11111111
+pub unsafe fn _kshiftri_mask32(a: __mmask32) -> __mmask32 {
+ a >> COUNT
}
-/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
+/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
#[inline]
#[target_feature(enable = "avx512bw")]
+#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
- a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+pub unsafe fn _kshiftri_mask64(a: __mmask64) -> __mmask64 {
+ a >> COUNT
}
-/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
+/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
+/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
+/// zeros, store 1 in and_not, otherwise store 0 in and_not.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
- _knot_mask32(a) & b
+pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
+ *and_not = (_kandn_mask32(a, b) == 0) as u8;
+ (_kand_mask32(a, b) == 0) as u8
}
-/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
+/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
+/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
+/// zeros, store 1 in and_not, otherwise store 0 in and_not.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
- _knot_mask64(a) & b
+pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
+ *and_not = (_kandn_mask64(a, b) == 0) as u8;
+ (_kand_mask64(a, b) == 0) as u8
}
-/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
+/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
+/// zeros, store 1 in dst, otherwise store 0 in dst.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
- a | b
+pub unsafe fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+ (_kandn_mask32(a, b) == 0) as u8
}
-/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
+/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
+/// zeros, store 1 in dst, otherwise store 0 in dst.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
- a | b
+pub unsafe fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+ (_kandn_mask64(a, b) == 0) as u8
}
-/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
+/// Compute the bitwise AND of 32-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
- a ^ b
+pub unsafe fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+ (_kand_mask32(a, b) == 0) as u8
}
-/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
+/// Compute the bitwise AND of 64-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
- a ^ b
+pub unsafe fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+ (_kand_mask64(a, b) == 0) as u8
}
-/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
+/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kunpackw)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
- _knot_mask32(a ^ b)
+#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
+pub unsafe fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
+ ((a & 0xffff) << 16) | (b & 0xffff)
}
-/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
+/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kunpackd)
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
- _knot_mask64(a ^ b)
+#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
+pub unsafe fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
+ ((a & 0xffffffff) << 32) | (b & 0xffffffff)
}
/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
@@ -10589,115 +11645,9 @@ pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
#[allow(improper_ctypes)]
extern "C" {
- #[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
- fn vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
- #[link_name = "llvm.x86.avx512.mask.paddus.w.256"]
- fn vpaddusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16;
- #[link_name = "llvm.x86.avx512.mask.paddus.w.128"]
- fn vpaddusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8;
-
- #[link_name = "llvm.x86.avx512.mask.paddus.b.512"]
- fn vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
- #[link_name = "llvm.x86.avx512.mask.paddus.b.256"]
- fn vpaddusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32;
- #[link_name = "llvm.x86.avx512.mask.paddus.b.128"]
- fn vpaddusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16;
-
- #[link_name = "llvm.x86.avx512.mask.padds.w.512"]
- fn vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
- #[link_name = "llvm.x86.avx512.mask.padds.w.256"]
- fn vpaddsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16;
- #[link_name = "llvm.x86.avx512.mask.padds.w.128"]
- fn vpaddsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8;
-
- #[link_name = "llvm.x86.avx512.mask.padds.b.512"]
- fn vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
- #[link_name = "llvm.x86.avx512.mask.padds.b.256"]
- fn vpaddsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32;
- #[link_name = "llvm.x86.avx512.mask.padds.b.128"]
- fn vpaddsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16;
-
- #[link_name = "llvm.x86.avx512.mask.psubus.w.512"]
- fn vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
- #[link_name = "llvm.x86.avx512.mask.psubus.w.256"]
- fn vpsubusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16;
- #[link_name = "llvm.x86.avx512.mask.psubus.w.128"]
- fn vpsubusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8;
-
- #[link_name = "llvm.x86.avx512.mask.psubus.b.512"]
- fn vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
- #[link_name = "llvm.x86.avx512.mask.psubus.b.256"]
- fn vpsubusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32;
- #[link_name = "llvm.x86.avx512.mask.psubus.b.128"]
- fn vpsubusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16;
-
- #[link_name = "llvm.x86.avx512.mask.psubs.w.512"]
- fn vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
- #[link_name = "llvm.x86.avx512.mask.psubs.w.256"]
- fn vpsubsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16;
- #[link_name = "llvm.x86.avx512.mask.psubs.w.128"]
- fn vpsubsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8;
-
- #[link_name = "llvm.x86.avx512.mask.psubs.b.512"]
- fn vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
- #[link_name = "llvm.x86.avx512.mask.psubs.b.256"]
- fn vpsubsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32;
- #[link_name = "llvm.x86.avx512.mask.psubs.b.128"]
- fn vpsubsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16;
-
- #[link_name = "llvm.x86.avx512.pmulhu.w.512"]
- fn vpmulhuw(a: u16x32, b: u16x32) -> u16x32;
- #[link_name = "llvm.x86.avx512.pmulh.w.512"]
- fn vpmulhw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
- #[link_name = "llvm.x86.avx512.mask.ucmp.w.512"]
- fn vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u32;
- #[link_name = "llvm.x86.avx512.mask.ucmp.w.256"]
- fn vpcmpuw256(a: u16x16, b: u16x16, op: i32, mask: u16) -> u16;
- #[link_name = "llvm.x86.avx512.mask.ucmp.w.128"]
- fn vpcmpuw128(a: u16x8, b: u16x8, op: i32, mask: u8) -> u8;
-
- #[link_name = "llvm.x86.avx512.mask.ucmp.b.512"]
- fn vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u64;
- #[link_name = "llvm.x86.avx512.mask.ucmp.b.256"]
- fn vpcmpub256(a: u8x32, b: u8x32, op: i32, mask: u32) -> u32;
- #[link_name = "llvm.x86.avx512.mask.ucmp.b.128"]
- fn vpcmpub128(a: u8x16, b: u8x16, op: i32, mask: u16) -> u16;
-
- #[link_name = "llvm.x86.avx512.mask.cmp.w.512"]
- fn vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u32;
- #[link_name = "llvm.x86.avx512.mask.cmp.w.256"]
- fn vpcmpw256(a: i16x16, b: i16x16, op: i32, mask: u16) -> u16;
- #[link_name = "llvm.x86.avx512.mask.cmp.w.128"]
- fn vpcmpw128(a: i16x8, b: i16x8, op: i32, mask: u8) -> u8;
-
- #[link_name = "llvm.x86.avx512.mask.cmp.b.512"]
- fn vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u64;
- #[link_name = "llvm.x86.avx512.mask.cmp.b.256"]
- fn vpcmpb256(a: i8x32, b: i8x32, op: i32, mask: u32) -> u32;
- #[link_name = "llvm.x86.avx512.mask.cmp.b.128"]
- fn vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u16;
-
- #[link_name = "llvm.x86.avx512.mask.pmaxu.w.512"]
- fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32;
- #[link_name = "llvm.x86.avx512.mask.pmaxu.b.512"]
- fn vpmaxub(a: u8x64, b: u8x64) -> u8x64;
- #[link_name = "llvm.x86.avx512.mask.pmaxs.w.512"]
- fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32;
- #[link_name = "llvm.x86.avx512.mask.pmaxs.b.512"]
- fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64;
-
- #[link_name = "llvm.x86.avx512.mask.pminu.w.512"]
- fn vpminuw(a: u16x32, b: u16x32) -> u16x32;
- #[link_name = "llvm.x86.avx512.mask.pminu.b.512"]
- fn vpminub(a: u8x64, b: u8x64) -> u8x64;
- #[link_name = "llvm.x86.avx512.mask.pmins.w.512"]
- fn vpminsw(a: i16x32, b: i16x32) -> i16x32;
- #[link_name = "llvm.x86.avx512.mask.pmins.b.512"]
- fn vpminsb(a: i8x64, b: i8x64) -> i8x64;
-
#[link_name = "llvm.x86.avx512.pmaddw.d.512"]
fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
@@ -10712,11 +11662,6 @@ extern "C" {
#[link_name = "llvm.x86.avx512.packuswb.512"]
fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
- #[link_name = "llvm.x86.avx512.pavg.w.512"]
- fn vpavgw(a: u16x32, b: u16x32) -> u16x32;
- #[link_name = "llvm.x86.avx512.pavg.b.512"]
- fn vpavgb(a: u8x64, b: u8x64) -> u8x64;
-
#[link_name = "llvm.x86.avx512.psll.w.512"]
fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
@@ -13754,11 +14699,227 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmple_epu8_mask() {
- let a = _mm512_set1_epi8(-1);
- let b = _mm512_set1_epi8(-1);
+ unsafe fn test_mm512_mask_cmple_epu8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_cmple_epu8_mask() {
+ let a = _mm256_set1_epi8(-1);
+ let b = _mm256_set1_epi8(-1);
+ let m = _mm256_cmple_epu8_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_cmple_epu8_mask() {
+ let a = _mm256_set1_epi8(-1);
+ let b = _mm256_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_cmple_epu8_mask() {
+ let a = _mm_set1_epi8(-1);
+ let b = _mm_set1_epi8(-1);
+ let m = _mm_cmple_epu8_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_cmple_epu8_mask() {
+ let a = _mm_set1_epi8(-1);
+ let b = _mm_set1_epi8(-1);
+ let mask = 0b01010101_01010101;
+ let r = _mm_mask_cmple_epu8_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmple_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmple_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmple_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_cmple_epi16_mask() {
+ let a = _mm256_set1_epi16(-1);
+ let b = _mm256_set1_epi16(-1);
+ let m = _mm256_cmple_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_cmple_epi16_mask() {
+ let a = _mm256_set1_epi16(-1);
+ let b = _mm256_set1_epi16(-1);
+ let mask = 0b01010101_01010101;
+ let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_cmple_epi16_mask() {
+ let a = _mm_set1_epi16(-1);
+ let b = _mm_set1_epi16(-1);
+ let m = _mm_cmple_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_cmple_epi16_mask() {
+ let a = _mm_set1_epi16(-1);
+ let b = _mm_set1_epi16(-1);
+ let mask = 0b01010101;
+ let r = _mm_mask_cmple_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmple_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmple_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmple_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_cmple_epi8_mask() {
+ let a = _mm256_set1_epi8(-1);
+ let b = _mm256_set1_epi8(-1);
+ let m = _mm256_cmple_epi8_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_cmple_epi8_mask() {
+ let a = _mm256_set1_epi8(-1);
+ let b = _mm256_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_cmple_epi8_mask() {
+ let a = _mm_set1_epi8(-1);
+ let b = _mm_set1_epi8(-1);
+ let m = _mm_cmple_epi8_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_cmple_epi8_mask() {
+ let a = _mm_set1_epi8(-1);
+ let b = _mm_set1_epi8(-1);
+ let mask = 0b01010101_01010101;
+ let r = _mm_mask_cmple_epi8_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpge_epu16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmpge_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpge_epu16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_cmpge_epu16_mask() {
+ let a = _mm256_set1_epi16(1);
+ let b = _mm256_set1_epi16(1);
+ let m = _mm256_cmpge_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_cmpge_epu16_mask() {
+ let a = _mm256_set1_epi16(1);
+ let b = _mm256_set1_epi16(1);
+ let mask = 0b01010101_01010101;
+ let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_cmpge_epu16_mask() {
+ let a = _mm_set1_epi16(1);
+ let b = _mm_set1_epi16(1);
+ let m = _mm_cmpge_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_cmpge_epu16_mask() {
+ let a = _mm_set1_epi16(1);
+ let b = _mm_set1_epi16(1);
+ let mask = 0b01010101;
+ let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpge_epu8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmpge_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpge_epu8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
+ let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -13766,95 +14927,95 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmple_epu8_mask() {
- let a = _mm256_set1_epi8(-1);
- let b = _mm256_set1_epi8(-1);
- let m = _mm256_cmple_epu8_mask(a, b);
+ unsafe fn test_mm256_cmpge_epu8_mask() {
+ let a = _mm256_set1_epi8(1);
+ let b = _mm256_set1_epi8(1);
+ let m = _mm256_cmpge_epu8_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmple_epu8_mask() {
- let a = _mm256_set1_epi8(-1);
- let b = _mm256_set1_epi8(-1);
+ unsafe fn test_mm256_mask_cmpge_epu8_mask() {
+ let a = _mm256_set1_epi8(1);
+ let b = _mm256_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
+ let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmple_epu8_mask() {
- let a = _mm_set1_epi8(-1);
- let b = _mm_set1_epi8(-1);
- let m = _mm_cmple_epu8_mask(a, b);
+ unsafe fn test_mm_cmpge_epu8_mask() {
+ let a = _mm_set1_epi8(1);
+ let b = _mm_set1_epi8(1);
+ let m = _mm_cmpge_epu8_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmple_epu8_mask() {
- let a = _mm_set1_epi8(-1);
- let b = _mm_set1_epi8(-1);
+ unsafe fn test_mm_mask_cmpge_epu8_mask() {
+ let a = _mm_set1_epi8(1);
+ let b = _mm_set1_epi8(1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmple_epu8_mask(mask, a, b);
+ let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmple_epi16_mask() {
+ unsafe fn test_mm512_cmpge_epi16_mask() {
let a = _mm512_set1_epi16(-1);
let b = _mm512_set1_epi16(-1);
- let m = _mm512_cmple_epi16_mask(a, b);
+ let m = _mm512_cmpge_epi16_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmple_epi16_mask() {
+ unsafe fn test_mm512_mask_cmpge_epi16_mask() {
let a = _mm512_set1_epi16(-1);
let b = _mm512_set1_epi16(-1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
+ let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmple_epi16_mask() {
+ unsafe fn test_mm256_cmpge_epi16_mask() {
let a = _mm256_set1_epi16(-1);
let b = _mm256_set1_epi16(-1);
- let m = _mm256_cmple_epi16_mask(a, b);
+ let m = _mm256_cmpge_epi16_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmple_epi16_mask() {
+ unsafe fn test_mm256_mask_cmpge_epi16_mask() {
let a = _mm256_set1_epi16(-1);
let b = _mm256_set1_epi16(-1);
let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
+ let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmple_epi16_mask() {
+ unsafe fn test_mm_cmpge_epi16_mask() {
let a = _mm_set1_epi16(-1);
let b = _mm_set1_epi16(-1);
- let m = _mm_cmple_epi16_mask(a, b);
+ let m = _mm_cmpge_epi16_mask(a, b);
assert_eq!(m, 0b11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmple_epi16_mask() {
+ unsafe fn test_mm_mask_cmpge_epi16_mask() {
let a = _mm_set1_epi16(-1);
let b = _mm_set1_epi16(-1);
let mask = 0b01010101;
- let r = _mm_mask_cmple_epi16_mask(mask, a, b);
+ let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmple_epi8_mask() {
+ unsafe fn test_mm512_cmpge_epi8_mask() {
let a = _mm512_set1_epi8(-1);
let b = _mm512_set1_epi8(-1);
- let m = _mm512_cmple_epi8_mask(a, b);
+ let m = _mm512_cmpge_epi8_mask(a, b);
assert_eq!(
m,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
@@ -13862,11 +15023,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmple_epi8_mask() {
+ unsafe fn test_mm512_mask_cmpge_epi8_mask() {
let a = _mm512_set1_epi8(-1);
let b = _mm512_set1_epi8(-1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
+ let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -13874,95 +15035,95 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmple_epi8_mask() {
+ unsafe fn test_mm256_cmpge_epi8_mask() {
let a = _mm256_set1_epi8(-1);
let b = _mm256_set1_epi8(-1);
- let m = _mm256_cmple_epi8_mask(a, b);
+ let m = _mm256_cmpge_epi8_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmple_epi8_mask() {
+ unsafe fn test_mm256_mask_cmpge_epi8_mask() {
let a = _mm256_set1_epi8(-1);
let b = _mm256_set1_epi8(-1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
+ let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmple_epi8_mask() {
+ unsafe fn test_mm_cmpge_epi8_mask() {
let a = _mm_set1_epi8(-1);
let b = _mm_set1_epi8(-1);
- let m = _mm_cmple_epi8_mask(a, b);
+ let m = _mm_cmpge_epi8_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmple_epi8_mask() {
+ unsafe fn test_mm_mask_cmpge_epi8_mask() {
let a = _mm_set1_epi8(-1);
let b = _mm_set1_epi8(-1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmple_epi8_mask(mask, a, b);
+ let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpge_epu16_mask() {
+ unsafe fn test_mm512_cmpeq_epu16_mask() {
let a = _mm512_set1_epi16(1);
let b = _mm512_set1_epi16(1);
- let m = _mm512_cmpge_epu16_mask(a, b);
+ let m = _mm512_cmpeq_epu16_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpge_epu16_mask() {
+ unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
let a = _mm512_set1_epi16(1);
let b = _mm512_set1_epi16(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
+ let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpge_epu16_mask() {
+ unsafe fn test_mm256_cmpeq_epu16_mask() {
let a = _mm256_set1_epi16(1);
let b = _mm256_set1_epi16(1);
- let m = _mm256_cmpge_epu16_mask(a, b);
+ let m = _mm256_cmpeq_epu16_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpge_epu16_mask() {
+ unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
let a = _mm256_set1_epi16(1);
let b = _mm256_set1_epi16(1);
let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
+ let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpge_epu16_mask() {
+ unsafe fn test_mm_cmpeq_epu16_mask() {
let a = _mm_set1_epi16(1);
let b = _mm_set1_epi16(1);
- let m = _mm_cmpge_epu16_mask(a, b);
+ let m = _mm_cmpeq_epu16_mask(a, b);
assert_eq!(m, 0b11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpge_epu16_mask() {
+ unsafe fn test_mm_mask_cmpeq_epu16_mask() {
let a = _mm_set1_epi16(1);
let b = _mm_set1_epi16(1);
let mask = 0b01010101;
- let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
+ let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
assert_eq!(r, 0b01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpge_epu8_mask() {
+ unsafe fn test_mm512_cmpeq_epu8_mask() {
let a = _mm512_set1_epi8(1);
let b = _mm512_set1_epi8(1);
- let m = _mm512_cmpge_epu8_mask(a, b);
+ let m = _mm512_cmpeq_epu8_mask(a, b);
assert_eq!(
m,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
@@ -13970,11 +15131,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpge_epu8_mask() {
+ unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
let a = _mm512_set1_epi8(1);
let b = _mm512_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
+ let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -13982,95 +15143,203 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpge_epu8_mask() {
+ unsafe fn test_mm256_cmpeq_epu8_mask() {
let a = _mm256_set1_epi8(1);
let b = _mm256_set1_epi8(1);
- let m = _mm256_cmpge_epu8_mask(a, b);
+ let m = _mm256_cmpeq_epu8_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpge_epu8_mask() {
+ unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
let a = _mm256_set1_epi8(1);
let b = _mm256_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
+ let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpge_epu8_mask() {
+ unsafe fn test_mm_cmpeq_epu8_mask() {
let a = _mm_set1_epi8(1);
let b = _mm_set1_epi8(1);
- let m = _mm_cmpge_epu8_mask(a, b);
+ let m = _mm_cmpeq_epu8_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpge_epu8_mask() {
+ unsafe fn test_mm_mask_cmpeq_epu8_mask() {
let a = _mm_set1_epi8(1);
let b = _mm_set1_epi8(1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
+ let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpge_epi16_mask() {
+ unsafe fn test_mm512_cmpeq_epi16_mask() {
let a = _mm512_set1_epi16(-1);
let b = _mm512_set1_epi16(-1);
- let m = _mm512_cmpge_epi16_mask(a, b);
+ let m = _mm512_cmpeq_epi16_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpge_epi16_mask() {
+ unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
let a = _mm512_set1_epi16(-1);
let b = _mm512_set1_epi16(-1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
+ let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpge_epi16_mask() {
+ unsafe fn test_mm256_cmpeq_epi16_mask() {
let a = _mm256_set1_epi16(-1);
let b = _mm256_set1_epi16(-1);
- let m = _mm256_cmpge_epi16_mask(a, b);
+ let m = _mm256_cmpeq_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
+ let a = _mm256_set1_epi16(-1);
+ let b = _mm256_set1_epi16(-1);
+ let mask = 0b01010101_01010101;
+ let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_cmpeq_epi16_mask() {
+ let a = _mm_set1_epi16(-1);
+ let b = _mm_set1_epi16(-1);
+ let m = _mm_cmpeq_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_cmpeq_epi16_mask() {
+ let a = _mm_set1_epi16(-1);
+ let b = _mm_set1_epi16(-1);
+ let mask = 0b01010101;
+ let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpeq_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmpeq_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_cmpeq_epi8_mask() {
+ let a = _mm256_set1_epi8(-1);
+ let b = _mm256_set1_epi8(-1);
+ let m = _mm256_cmpeq_epi8_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
+ let a = _mm256_set1_epi8(-1);
+ let b = _mm256_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_cmpeq_epi8_mask() {
+ let a = _mm_set1_epi8(-1);
+ let b = _mm_set1_epi8(-1);
+ let m = _mm_cmpeq_epi8_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_cmpeq_epi8_mask() {
+ let a = _mm_set1_epi8(-1);
+ let b = _mm_set1_epi8(-1);
+ let mask = 0b01010101_01010101;
+ let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpneq_epu16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmpneq_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_cmpneq_epu16_mask() {
+ let a = _mm256_set1_epi16(2);
+ let b = _mm256_set1_epi16(1);
+ let m = _mm256_cmpneq_epu16_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpge_epi16_mask() {
- let a = _mm256_set1_epi16(-1);
- let b = _mm256_set1_epi16(-1);
+ unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
+ let a = _mm256_set1_epi16(2);
+ let b = _mm256_set1_epi16(1);
let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
+ let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpge_epi16_mask() {
- let a = _mm_set1_epi16(-1);
- let b = _mm_set1_epi16(-1);
- let m = _mm_cmpge_epi16_mask(a, b);
+ unsafe fn test_mm_cmpneq_epu16_mask() {
+ let a = _mm_set1_epi16(2);
+ let b = _mm_set1_epi16(1);
+ let m = _mm_cmpneq_epu16_mask(a, b);
assert_eq!(m, 0b11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpge_epi16_mask() {
- let a = _mm_set1_epi16(-1);
- let b = _mm_set1_epi16(-1);
+ unsafe fn test_mm_mask_cmpneq_epu16_mask() {
+ let a = _mm_set1_epi16(2);
+ let b = _mm_set1_epi16(1);
let mask = 0b01010101;
- let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
+ let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
assert_eq!(r, 0b01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpge_epi8_mask() {
- let a = _mm512_set1_epi8(-1);
- let b = _mm512_set1_epi8(-1);
- let m = _mm512_cmpge_epi8_mask(a, b);
+ unsafe fn test_mm512_cmpneq_epu8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmpneq_epu8_mask(a, b);
assert_eq!(
m,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
@@ -14078,11 +15347,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpge_epi8_mask() {
- let a = _mm512_set1_epi8(-1);
- let b = _mm512_set1_epi8(-1);
+ unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
+ let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -14090,95 +15359,95 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpge_epi8_mask() {
- let a = _mm256_set1_epi8(-1);
- let b = _mm256_set1_epi8(-1);
- let m = _mm256_cmpge_epi8_mask(a, b);
+ unsafe fn test_mm256_cmpneq_epu8_mask() {
+ let a = _mm256_set1_epi8(2);
+ let b = _mm256_set1_epi8(1);
+ let m = _mm256_cmpneq_epu8_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpge_epi8_mask() {
- let a = _mm256_set1_epi8(-1);
- let b = _mm256_set1_epi8(-1);
+ unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
+ let a = _mm256_set1_epi8(2);
+ let b = _mm256_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
+ let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpge_epi8_mask() {
- let a = _mm_set1_epi8(-1);
- let b = _mm_set1_epi8(-1);
- let m = _mm_cmpge_epi8_mask(a, b);
+ unsafe fn test_mm_cmpneq_epu8_mask() {
+ let a = _mm_set1_epi8(2);
+ let b = _mm_set1_epi8(1);
+ let m = _mm_cmpneq_epu8_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpge_epi8_mask() {
- let a = _mm_set1_epi8(-1);
- let b = _mm_set1_epi8(-1);
+ unsafe fn test_mm_mask_cmpneq_epu8_mask() {
+ let a = _mm_set1_epi8(2);
+ let b = _mm_set1_epi8(1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
+ let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpeq_epu16_mask() {
+ unsafe fn test_mm512_cmpneq_epi16_mask() {
let a = _mm512_set1_epi16(1);
- let b = _mm512_set1_epi16(1);
- let m = _mm512_cmpeq_epu16_mask(a, b);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmpneq_epi16_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
+ unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
let a = _mm512_set1_epi16(1);
- let b = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(-1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
+ let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpeq_epu16_mask() {
+ unsafe fn test_mm256_cmpneq_epi16_mask() {
let a = _mm256_set1_epi16(1);
- let b = _mm256_set1_epi16(1);
- let m = _mm256_cmpeq_epu16_mask(a, b);
+ let b = _mm256_set1_epi16(-1);
+ let m = _mm256_cmpneq_epi16_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
+ unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
let a = _mm256_set1_epi16(1);
- let b = _mm256_set1_epi16(1);
+ let b = _mm256_set1_epi16(-1);
let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
+ let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpeq_epu16_mask() {
+ unsafe fn test_mm_cmpneq_epi16_mask() {
let a = _mm_set1_epi16(1);
- let b = _mm_set1_epi16(1);
- let m = _mm_cmpeq_epu16_mask(a, b);
+ let b = _mm_set1_epi16(-1);
+ let m = _mm_cmpneq_epi16_mask(a, b);
assert_eq!(m, 0b11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpeq_epu16_mask() {
+ unsafe fn test_mm_mask_cmpneq_epi16_mask() {
let a = _mm_set1_epi16(1);
- let b = _mm_set1_epi16(1);
+ let b = _mm_set1_epi16(-1);
let mask = 0b01010101;
- let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
+ let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
assert_eq!(r, 0b01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpeq_epu8_mask() {
+ unsafe fn test_mm512_cmpneq_epi8_mask() {
let a = _mm512_set1_epi8(1);
- let b = _mm512_set1_epi8(1);
- let m = _mm512_cmpeq_epu8_mask(a, b);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmpneq_epi8_mask(a, b);
assert_eq!(
m,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
@@ -14186,11 +15455,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
+ unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
let a = _mm512_set1_epi8(1);
- let b = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(-1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
+ let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -14198,95 +15467,95 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpeq_epu8_mask() {
+ unsafe fn test_mm256_cmpneq_epi8_mask() {
let a = _mm256_set1_epi8(1);
- let b = _mm256_set1_epi8(1);
- let m = _mm256_cmpeq_epu8_mask(a, b);
+ let b = _mm256_set1_epi8(-1);
+ let m = _mm256_cmpneq_epi8_mask(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
+ unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
let a = _mm256_set1_epi8(1);
- let b = _mm256_set1_epi8(1);
+ let b = _mm256_set1_epi8(-1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
+ let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpeq_epu8_mask() {
+ unsafe fn test_mm_cmpneq_epi8_mask() {
let a = _mm_set1_epi8(1);
- let b = _mm_set1_epi8(1);
- let m = _mm_cmpeq_epu8_mask(a, b);
+ let b = _mm_set1_epi8(-1);
+ let m = _mm_cmpneq_epi8_mask(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpeq_epu8_mask() {
+ unsafe fn test_mm_mask_cmpneq_epi8_mask() {
let a = _mm_set1_epi8(1);
- let b = _mm_set1_epi8(1);
+ let b = _mm_set1_epi8(-1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
+ let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpeq_epi16_mask() {
- let a = _mm512_set1_epi16(-1);
- let b = _mm512_set1_epi16(-1);
- let m = _mm512_cmpeq_epi16_mask(a, b);
+ unsafe fn test_mm512_cmp_epu16_mask() {
+ let a = _mm512_set1_epi16(0);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
- let a = _mm512_set1_epi16(-1);
- let b = _mm512_set1_epi16(-1);
+ unsafe fn test_mm512_mask_cmp_epu16_mask() {
+ let a = _mm512_set1_epi16(0);
+ let b = _mm512_set1_epi16(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
+ let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpeq_epi16_mask() {
- let a = _mm256_set1_epi16(-1);
- let b = _mm256_set1_epi16(-1);
- let m = _mm256_cmpeq_epi16_mask(a, b);
+ unsafe fn test_mm256_cmp_epu16_mask() {
+ let a = _mm256_set1_epi16(0);
+ let b = _mm256_set1_epi16(1);
+ let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
- let a = _mm256_set1_epi16(-1);
- let b = _mm256_set1_epi16(-1);
+ unsafe fn test_mm256_mask_cmp_epu16_mask() {
+ let a = _mm256_set1_epi16(0);
+ let b = _mm256_set1_epi16(1);
let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
+ let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpeq_epi16_mask() {
- let a = _mm_set1_epi16(-1);
- let b = _mm_set1_epi16(-1);
- let m = _mm_cmpeq_epi16_mask(a, b);
+ unsafe fn test_mm_cmp_epu16_mask() {
+ let a = _mm_set1_epi16(0);
+ let b = _mm_set1_epi16(1);
+ let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpeq_epi16_mask() {
- let a = _mm_set1_epi16(-1);
- let b = _mm_set1_epi16(-1);
+ unsafe fn test_mm_mask_cmp_epu16_mask() {
+ let a = _mm_set1_epi16(0);
+ let b = _mm_set1_epi16(1);
let mask = 0b01010101;
- let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
+ let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpeq_epi8_mask() {
- let a = _mm512_set1_epi8(-1);
- let b = _mm512_set1_epi8(-1);
- let m = _mm512_cmpeq_epi8_mask(a, b);
+ unsafe fn test_mm512_cmp_epu8_mask() {
+ let a = _mm512_set1_epi8(0);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(
m,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
@@ -14294,11 +15563,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
- let a = _mm512_set1_epi8(-1);
- let b = _mm512_set1_epi8(-1);
+ unsafe fn test_mm512_mask_cmp_epu8_mask() {
+ let a = _mm512_set1_epi8(0);
+ let b = _mm512_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
+ let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -14306,95 +15575,95 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpeq_epi8_mask() {
- let a = _mm256_set1_epi8(-1);
- let b = _mm256_set1_epi8(-1);
- let m = _mm256_cmpeq_epi8_mask(a, b);
+ unsafe fn test_mm256_cmp_epu8_mask() {
+ let a = _mm256_set1_epi8(0);
+ let b = _mm256_set1_epi8(1);
+ let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
- let a = _mm256_set1_epi8(-1);
- let b = _mm256_set1_epi8(-1);
+ unsafe fn test_mm256_mask_cmp_epu8_mask() {
+ let a = _mm256_set1_epi8(0);
+ let b = _mm256_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
+ let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpeq_epi8_mask() {
- let a = _mm_set1_epi8(-1);
- let b = _mm_set1_epi8(-1);
- let m = _mm_cmpeq_epi8_mask(a, b);
+ unsafe fn test_mm_cmp_epu8_mask() {
+ let a = _mm_set1_epi8(0);
+ let b = _mm_set1_epi8(1);
+ let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpeq_epi8_mask() {
- let a = _mm_set1_epi8(-1);
- let b = _mm_set1_epi8(-1);
+ unsafe fn test_mm_mask_cmp_epu8_mask() {
+ let a = _mm_set1_epi8(0);
+ let b = _mm_set1_epi8(1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
+ let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpneq_epu16_mask() {
- let a = _mm512_set1_epi16(2);
+ unsafe fn test_mm512_cmp_epi16_mask() {
+ let a = _mm512_set1_epi16(0);
let b = _mm512_set1_epi16(1);
- let m = _mm512_cmpneq_epu16_mask(a, b);
+ let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
- let a = _mm512_set1_epi16(2);
+ unsafe fn test_mm512_mask_cmp_epi16_mask() {
+ let a = _mm512_set1_epi16(0);
let b = _mm512_set1_epi16(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
+ let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpneq_epu16_mask() {
- let a = _mm256_set1_epi16(2);
+ unsafe fn test_mm256_cmp_epi16_mask() {
+ let a = _mm256_set1_epi16(0);
let b = _mm256_set1_epi16(1);
- let m = _mm256_cmpneq_epu16_mask(a, b);
+ let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
- let a = _mm256_set1_epi16(2);
+ unsafe fn test_mm256_mask_cmp_epi16_mask() {
+ let a = _mm256_set1_epi16(0);
let b = _mm256_set1_epi16(1);
let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
+ let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpneq_epu16_mask() {
- let a = _mm_set1_epi16(2);
+ unsafe fn test_mm_cmp_epi16_mask() {
+ let a = _mm_set1_epi16(0);
let b = _mm_set1_epi16(1);
- let m = _mm_cmpneq_epu16_mask(a, b);
+ let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpneq_epu16_mask() {
- let a = _mm_set1_epi16(2);
+ unsafe fn test_mm_mask_cmp_epi16_mask() {
+ let a = _mm_set1_epi16(0);
let b = _mm_set1_epi16(1);
let mask = 0b01010101;
- let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
+ let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101);
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpneq_epu8_mask() {
- let a = _mm512_set1_epi8(2);
+ unsafe fn test_mm512_cmp_epi8_mask() {
+ let a = _mm512_set1_epi8(0);
let b = _mm512_set1_epi8(1);
- let m = _mm512_cmpneq_epu8_mask(a, b);
+ let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(
m,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
@@ -14402,11 +15671,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
- let a = _mm512_set1_epi8(2);
+ unsafe fn test_mm512_mask_cmp_epi8_mask() {
+ let a = _mm512_set1_epi8(0);
let b = _mm512_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
+ let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(
r,
0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
@@ -14414,361 +15683,527 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpneq_epu8_mask() {
- let a = _mm256_set1_epi8(2);
+ unsafe fn test_mm256_cmp_epi8_mask() {
+ let a = _mm256_set1_epi8(0);
let b = _mm256_set1_epi8(1);
- let m = _mm256_cmpneq_epu8_mask(a, b);
+ let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111_11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
- let a = _mm256_set1_epi8(2);
+ unsafe fn test_mm256_mask_cmp_epi8_mask() {
+ let a = _mm256_set1_epi8(0);
let b = _mm256_set1_epi8(1);
let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
+ let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101_01010101_01010101);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpneq_epu8_mask() {
- let a = _mm_set1_epi8(2);
+ unsafe fn test_mm_cmp_epi8_mask() {
+ let a = _mm_set1_epi8(0);
let b = _mm_set1_epi8(1);
- let m = _mm_cmpneq_epu8_mask(a, b);
+ let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
assert_eq!(m, 0b11111111_11111111);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpneq_epu8_mask() {
- let a = _mm_set1_epi8(2);
+ unsafe fn test_mm_mask_cmp_epi8_mask() {
+ let a = _mm_set1_epi8(0);
let b = _mm_set1_epi8(1);
let mask = 0b01010101_01010101;
- let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
+ let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
assert_eq!(r, 0b01010101_01010101);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpneq_epi16_mask() {
- let a = _mm512_set1_epi16(1);
- let b = _mm512_set1_epi16(-1);
- let m = _mm512_cmpneq_epi16_mask(a, b);
- assert_eq!(m, 0b11111111_11111111_11111111_11111111);
- }
-
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
- let a = _mm512_set1_epi16(1);
- let b = _mm512_set1_epi16(-1);
- let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
- assert_eq!(r, 0b01010101_01010101_01010101_01010101);
- }
-
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpneq_epi16_mask() {
+ unsafe fn test_mm256_reduce_add_epi16() {
let a = _mm256_set1_epi16(1);
- let b = _mm256_set1_epi16(-1);
- let m = _mm256_cmpneq_epi16_mask(a, b);
- assert_eq!(m, 0b11111111_11111111);
+ let e = _mm256_reduce_add_epi16(a);
+ assert_eq!(16, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
+ unsafe fn test_mm256_mask_reduce_add_epi16() {
let a = _mm256_set1_epi16(1);
- let b = _mm256_set1_epi16(-1);
- let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
- assert_eq!(r, 0b01010101_01010101);
+ let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
+ assert_eq!(8, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpneq_epi16_mask() {
+ unsafe fn test_mm_reduce_add_epi16() {
let a = _mm_set1_epi16(1);
- let b = _mm_set1_epi16(-1);
- let m = _mm_cmpneq_epi16_mask(a, b);
- assert_eq!(m, 0b11111111);
+ let e = _mm_reduce_add_epi16(a);
+ assert_eq!(8, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpneq_epi16_mask() {
+ unsafe fn test_mm_mask_reduce_add_epi16() {
let a = _mm_set1_epi16(1);
- let b = _mm_set1_epi16(-1);
- let mask = 0b01010101;
- let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
- assert_eq!(r, 0b01010101);
+ let e = _mm_mask_reduce_add_epi16(0b11110000, a);
+ assert_eq!(4, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmpneq_epi8_mask() {
- let a = _mm512_set1_epi8(1);
- let b = _mm512_set1_epi8(-1);
- let m = _mm512_cmpneq_epi8_mask(a, b);
- assert_eq!(
- m,
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_add_epi8() {
+ let a = _mm256_set1_epi8(1);
+ let e = _mm256_reduce_add_epi8(a);
+ assert_eq!(32, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_add_epi8() {
+ let a = _mm256_set1_epi8(1);
+ let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
+ assert_eq!(16, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_add_epi8() {
+ let a = _mm_set1_epi8(1);
+ let e = _mm_reduce_add_epi8(a);
+ assert_eq!(16, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_add_epi8() {
+ let a = _mm_set1_epi8(1);
+ let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
+ assert_eq!(8, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_and_epi16() {
+ let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm256_reduce_and_epi16(a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_and_epi16() {
+ let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_and_epi16() {
+ let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
+ let e = _mm_reduce_and_epi16(a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_and_epi16() {
+ let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
+ let e = _mm_mask_reduce_and_epi16(0b11110000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_and_epi8() {
+ let a = _mm256_set_epi8(
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ 2, 2, 2,
);
+ let e = _mm256_reduce_and_epi8(a);
+ assert_eq!(0, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
- let a = _mm512_set1_epi8(1);
- let b = _mm512_set1_epi8(-1);
- let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
- assert_eq!(
- r,
- 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_and_epi8() {
+ let a = _mm256_set_epi8(
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ 2, 2, 2,
);
+ let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
+ assert_eq!(1, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmpneq_epi8_mask() {
- let a = _mm256_set1_epi8(1);
- let b = _mm256_set1_epi8(-1);
- let m = _mm256_cmpneq_epi8_mask(a, b);
- assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ unsafe fn test_mm_reduce_and_epi8() {
+ let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm_reduce_and_epi8(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
- let a = _mm256_set1_epi8(1);
- let b = _mm256_set1_epi8(-1);
- let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
- assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ unsafe fn test_mm_mask_reduce_and_epi8() {
+ let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
+ assert_eq!(1, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmpneq_epi8_mask() {
- let a = _mm_set1_epi8(1);
- let b = _mm_set1_epi8(-1);
- let m = _mm_cmpneq_epi8_mask(a, b);
- assert_eq!(m, 0b11111111_11111111);
+ unsafe fn test_mm256_reduce_mul_epi16() {
+ let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
+ let e = _mm256_reduce_mul_epi16(a);
+ assert_eq!(256, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmpneq_epi8_mask() {
- let a = _mm_set1_epi8(1);
- let b = _mm_set1_epi8(-1);
- let mask = 0b01010101_01010101;
- let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
- assert_eq!(r, 0b01010101_01010101);
+ unsafe fn test_mm256_mask_reduce_mul_epi16() {
+ let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
+ assert_eq!(1, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmp_epu16_mask() {
- let a = _mm512_set1_epi16(0);
- let b = _mm512_set1_epi16(1);
- let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_mul_epi16() {
+ let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
+ let e = _mm_reduce_mul_epi16(a);
+ assert_eq!(16, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_mul_epi16() {
+ let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
+ let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_mul_epi8() {
+ let a = _mm256_set_epi8(
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2,
+ );
+ let e = _mm256_reduce_mul_epi8(a);
+ assert_eq!(64, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_mul_epi8() {
+ let a = _mm256_set_epi8(
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2,
+ );
+ let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_mul_epi8() {
+ let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
+ let e = _mm_reduce_mul_epi8(a);
+ assert_eq!(8, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_mul_epi8() {
+ let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
+ let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_max_epi16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i16 = _mm256_reduce_max_epi16(a);
+ assert_eq!(15, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_max_epi16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_max_epi16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i16 = _mm_reduce_max_epi16(a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_max_epi16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
+ assert_eq!(3, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_max_epi8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let e: i8 = _mm256_reduce_max_epi8(a);
+ assert_eq!(31, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_max_epi8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
+ assert_eq!(15, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_max_epi8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i8 = _mm_reduce_max_epi8(a);
+ assert_eq!(15, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_max_epi8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_max_epu16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u16 = _mm256_reduce_max_epu16(a);
+ assert_eq!(15, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_max_epu16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_max_epu16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u16 = _mm_reduce_max_epu16(a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_max_epu16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
+ assert_eq!(3, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_max_epu8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let e: u8 = _mm256_reduce_max_epu8(a);
+ assert_eq!(31, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_max_epu8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
+ assert_eq!(15, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_max_epu8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u8 = _mm_reduce_max_epu8(a);
+ assert_eq!(15, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmp_epu16_mask() {
- let a = _mm512_set1_epi16(0);
- let b = _mm512_set1_epi16(1);
- let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_max_epu8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
+ assert_eq!(7, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmp_epu16_mask() {
- let a = _mm256_set1_epi16(0);
- let b = _mm256_set1_epi16(1);
- let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111);
+ unsafe fn test_mm256_reduce_min_epi16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i16 = _mm256_reduce_min_epi16(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmp_epu16_mask() {
- let a = _mm256_set1_epi16(0);
- let b = _mm256_set1_epi16(1);
- let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101);
+ unsafe fn test_mm256_mask_reduce_min_epi16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmp_epu16_mask() {
- let a = _mm_set1_epi16(0);
- let b = _mm_set1_epi16(1);
- let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111);
+ unsafe fn test_mm_reduce_min_epi16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i16 = _mm_reduce_min_epi16(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmp_epu16_mask() {
- let a = _mm_set1_epi16(0);
- let b = _mm_set1_epi16(1);
- let mask = 0b01010101;
- let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101);
+ unsafe fn test_mm_mask_reduce_min_epi16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
+ assert_eq!(0, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmp_epu8_mask() {
- let a = _mm512_set1_epi8(0);
- let b = _mm512_set1_epi8(1);
- let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(
- m,
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_min_epi8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
);
+ let e: i8 = _mm256_reduce_min_epi8(a);
+ assert_eq!(0, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmp_epu8_mask() {
- let a = _mm512_set1_epi8(0);
- let b = _mm512_set1_epi8(1);
- let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(
- r,
- 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_min_epi8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
);
+ let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmp_epu8_mask() {
- let a = _mm256_set1_epi8(0);
- let b = _mm256_set1_epi8(1);
- let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ unsafe fn test_mm_reduce_min_epi8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i8 = _mm_reduce_min_epi8(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmp_epu8_mask() {
- let a = _mm256_set1_epi8(0);
- let b = _mm256_set1_epi8(1);
- let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ unsafe fn test_mm_mask_reduce_min_epi8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmp_epu8_mask() {
- let a = _mm_set1_epi8(0);
- let b = _mm_set1_epi8(1);
- let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111);
+ unsafe fn test_mm256_reduce_min_epu16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u16 = _mm256_reduce_min_epu16(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmp_epu8_mask() {
- let a = _mm_set1_epi8(0);
- let b = _mm_set1_epi8(1);
- let mask = 0b01010101_01010101;
- let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101);
+ unsafe fn test_mm256_mask_reduce_min_epu16() {
+ let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
+ assert_eq!(0, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmp_epi16_mask() {
- let a = _mm512_set1_epi16(0);
- let b = _mm512_set1_epi16(1);
- let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_min_epu16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u16 = _mm_reduce_min_epu16(a);
+ assert_eq!(0, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmp_epi16_mask() {
- let a = _mm512_set1_epi16(0);
- let b = _mm512_set1_epi16(1);
- let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_min_epu16() {
+ let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmp_epi16_mask() {
- let a = _mm256_set1_epi16(0);
- let b = _mm256_set1_epi16(1);
- let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111);
+ unsafe fn test_mm256_reduce_min_epu8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let e: u8 = _mm256_reduce_min_epu8(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmp_epi16_mask() {
- let a = _mm256_set1_epi16(0);
- let b = _mm256_set1_epi16(1);
- let mask = 0b01010101_01010101;
- let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101);
+ unsafe fn test_mm256_mask_reduce_min_epu8() {
+ let a = _mm256_set_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmp_epi16_mask() {
- let a = _mm_set1_epi16(0);
- let b = _mm_set1_epi16(1);
- let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111);
+ unsafe fn test_mm_reduce_min_epu8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u8 = _mm_reduce_min_epu8(a);
+ assert_eq!(0, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmp_epi16_mask() {
- let a = _mm_set1_epi16(0);
- let b = _mm_set1_epi16(1);
- let mask = 0b01010101;
- let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101);
+ unsafe fn test_mm_mask_reduce_min_epu8() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
+ assert_eq!(0, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_cmp_epi8_mask() {
- let a = _mm512_set1_epi8(0);
- let b = _mm512_set1_epi8(1);
- let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(
- m,
- 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
- );
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_reduce_or_epi16() {
+ let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm256_reduce_or_epi16(a);
+ assert_eq!(3, e);
}
- #[simd_test(enable = "avx512bw")]
- unsafe fn test_mm512_mask_cmp_epi8_mask() {
- let a = _mm512_set1_epi8(0);
- let b = _mm512_set1_epi8(1);
- let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
- let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(
- r,
- 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
- );
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm256_mask_reduce_or_epi16() {
+ let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
+ assert_eq!(1, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_cmp_epi8_mask() {
- let a = _mm256_set1_epi8(0);
- let b = _mm256_set1_epi8(1);
- let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ unsafe fn test_mm_reduce_or_epi16() {
+ let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
+ let e = _mm_reduce_or_epi16(a);
+ assert_eq!(3, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm256_mask_cmp_epi8_mask() {
- let a = _mm256_set1_epi8(0);
- let b = _mm256_set1_epi8(1);
- let mask = 0b01010101_01010101_01010101_01010101;
- let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ unsafe fn test_mm_mask_reduce_or_epi16() {
+ let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
+ let e = _mm_mask_reduce_or_epi16(0b11110000, a);
+ assert_eq!(1, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_cmp_epi8_mask() {
- let a = _mm_set1_epi8(0);
- let b = _mm_set1_epi8(1);
- let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
- assert_eq!(m, 0b11111111_11111111);
+ unsafe fn test_mm256_reduce_or_epi8() {
+ let a = _mm256_set_epi8(
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ 2, 2, 2,
+ );
+ let e = _mm256_reduce_or_epi8(a);
+ assert_eq!(3, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
- unsafe fn test_mm_mask_cmp_epi8_mask() {
- let a = _mm_set1_epi8(0);
- let b = _mm_set1_epi8(1);
- let mask = 0b01010101_01010101;
- let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
- assert_eq!(r, 0b01010101_01010101);
+ unsafe fn test_mm256_mask_reduce_or_epi8() {
+ let a = _mm256_set_epi8(
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ 2, 2, 2,
+ );
+ let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_reduce_or_epi8() {
+ let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm_reduce_or_epi8(a);
+ assert_eq!(3, e);
+ }
+
+ #[simd_test(enable = "avx512bw,avx512vl")]
+ unsafe fn test_mm_mask_reduce_or_epi8() {
+ let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+ let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
+ assert_eq!(1, e);
}
#[simd_test(enable = "avx512bw")]
@@ -18679,6 +20114,22 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_cvtmask32_u32() {
+ let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
+ let r = _cvtmask32_u32(a);
+ let e: u32 = 0b11001100_00110011_01100110_10011001;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_cvtu32_mask32() {
+ let a: u32 = 0b11001100_00110011_01100110_10011001;
+ let r = _cvtu32_mask32(a);
+ let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
+ assert_eq!(r, e);
+ }
+
#[simd_test(enable = "avx512bw")]
unsafe fn test_kadd_mask32() {
let a: __mmask32 = 11;
@@ -18820,6 +20271,160 @@ mod tests {
assert_eq!(r, e);
}
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kortest_mask32_u8() {
+ let a: __mmask32 = 0b0110100101101001_0110100101101001;
+ let b: __mmask32 = 0b1011011010110110_1011011010110110;
+ let mut all_ones: u8 = 0;
+ let r = _kortest_mask32_u8(a, b, &mut all_ones);
+ assert_eq!(r, 0);
+ assert_eq!(all_ones, 1);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kortest_mask64_u8() {
+ let a: __mmask64 = 0b0110100101101001_0110100101101001;
+ let b: __mmask64 = 0b1011011010110110_1011011010110110;
+ let mut all_ones: u8 = 0;
+ let r = _kortest_mask64_u8(a, b, &mut all_ones);
+ assert_eq!(r, 0);
+ assert_eq!(all_ones, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kortestc_mask32_u8() {
+ let a: __mmask32 = 0b0110100101101001_0110100101101001;
+ let b: __mmask32 = 0b1011011010110110_1011011010110110;
+ let r = _kortestc_mask32_u8(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kortestc_mask64_u8() {
+ let a: __mmask64 = 0b0110100101101001_0110100101101001;
+ let b: __mmask64 = 0b1011011010110110_1011011010110110;
+ let r = _kortestc_mask64_u8(a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kortestz_mask32_u8() {
+ let a: __mmask32 = 0b0110100101101001_0110100101101001;
+ let b: __mmask32 = 0b1011011010110110_1011011010110110;
+ let r = _kortestz_mask32_u8(a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kortestz_mask64_u8() {
+ let a: __mmask64 = 0b0110100101101001_0110100101101001;
+ let b: __mmask64 = 0b1011011010110110_1011011010110110;
+ let r = _kortestz_mask64_u8(a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kshiftli_mask32() {
+ let a: __mmask32 = 0b0110100101101001_0110100101101001;
+ let r = _kshiftli_mask32::<3>(a);
+ let e: __mmask32 = 0b0100101101001011_0100101101001000;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kshiftli_mask64() {
+ let a: __mmask64 = 0b0110100101101001_0110100101101001;
+ let r = _kshiftli_mask64::<3>(a);
+ let e: __mmask64 = 0b0110100101101001011_0100101101001000;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kshiftri_mask32() {
+ let a: __mmask32 = 0b0110100101101001_0110100101101001;
+ let r = _kshiftri_mask32::<3>(a);
+ let e: __mmask32 = 0b0000110100101101_0010110100101101;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_kshiftri_mask64() {
+ let a: __mmask64 = 0b0110100101101001011_0100101101001000;
+ let r = _kshiftri_mask64::<3>(a);
+ let e: __mmask64 = 0b0110100101101001_0110100101101001;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_ktest_mask32_u8() {
+ let a: __mmask32 = 0b0110100100111100_0110100100111100;
+ let b: __mmask32 = 0b1001011011000011_1001011011000011;
+ let mut and_not: u8 = 0;
+ let r = _ktest_mask32_u8(a, b, &mut and_not);
+ assert_eq!(r, 1);
+ assert_eq!(and_not, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_ktestc_mask32_u8() {
+ let a: __mmask32 = 0b0110100100111100_0110100100111100;
+ let b: __mmask32 = 0b1001011011000011_1001011011000011;
+ let r = _ktestc_mask32_u8(a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_ktestz_mask32_u8() {
+ let a: __mmask32 = 0b0110100100111100_0110100100111100;
+ let b: __mmask32 = 0b1001011011000011_1001011011000011;
+ let r = _ktestz_mask32_u8(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_ktest_mask64_u8() {
+ let a: __mmask64 = 0b0110100100111100_0110100100111100;
+ let b: __mmask64 = 0b1001011011000011_1001011011000011;
+ let mut and_not: u8 = 0;
+ let r = _ktest_mask64_u8(a, b, &mut and_not);
+ assert_eq!(r, 1);
+ assert_eq!(and_not, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_ktestc_mask64_u8() {
+ let a: __mmask64 = 0b0110100100111100_0110100100111100;
+ let b: __mmask64 = 0b1001011011000011_1001011011000011;
+ let r = _ktestc_mask64_u8(a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_ktestz_mask64_u8() {
+ let a: __mmask64 = 0b0110100100111100_0110100100111100;
+ let b: __mmask64 = 0b1001011011000011_1001011011000011;
+ let r = _ktestz_mask64_u8(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_kunpackw() {
+ let a: u32 = 0x00110011;
+ let b: u32 = 0x00001011;
+ let r = _mm512_kunpackw(a, b);
+ let e: u32 = 0x00111011;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_kunpackd() {
+ let a: u64 = 0x11001100_00110011;
+ let b: u64 = 0x00101110_00001011;
+ let r = _mm512_kunpackd(a, b);
+ let e: u64 = 0x00110011_00001011;
+ assert_eq!(r, e);
+ }
+
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_cvtepi16_epi8() {
let a = _mm512_set1_epi16(2);
diff --git a/crates/core_arch/src/x86_64/avx512bw.rs b/crates/core_arch/src/x86_64/avx512bw.rs
new file mode 100644
index 0000000000..798fc4adf6
--- /dev/null
+++ b/crates/core_arch/src/x86_64/avx512bw.rs
@@ -0,0 +1,45 @@
+use crate::core_arch::x86::*;
+
+/// Convert 64-bit mask a into an integer value, and store the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask64_u64)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 {
+ a
+}
+
+/// Convert integer value a into an 64-bit mask, and store the result in k.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu64_mask64)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _cvtu64_mask64(a: u64) -> __mmask64 {
+ a
+}
+
+#[cfg(test)]
+mod tests {
+
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::{x86::*, x86_64::*};
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_cvtmask64_u64() {
+ let a: __mmask64 = 0b11001100_00110011_01100110_10011001;
+ let r = _cvtmask64_u64(a);
+ let e: u64 = 0b11001100_00110011_01100110_10011001;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_cvtu64_mask64() {
+ let a: u64 = 0b11001100_00110011_01100110_10011001;
+ let r = _cvtu64_mask64(a);
+ let e: __mmask64 = 0b11001100_00110011_01100110_10011001;
+ assert_eq!(r, e);
+ }
+}
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index ff46373d90..fb7bce6871 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -46,6 +46,10 @@ mod avx512f;
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub use self::avx512f::*;
+mod avx512bw;
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub use self::avx512bw::*;
+
mod bswap;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use self::bswap::*;