Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d2a2fd3
forward `TEST_SAMPLE_INTRINSICS_PERCENTAGE`
folkertdev Oct 26, 2025
d618085
intrinsic-test: display more logs in CI
folkertdev Oct 26, 2025
48116cf
Add AMX intrinsics
sayantn Apr 6, 2025
2815023
Patch stdarch_verify to not check intel definition for new AMX intrin…
sayantn Nov 1, 2025
17c3f8a
Add tests for new AMX intrinsics
sayantn Nov 1, 2025
f9dc790
improve `_mm256_permute2f128` tests
folkertdev Nov 2, 2025
ff78b84
Merge pull request #1951 from folkertdev/permute2f128-tests
sayantn Nov 3, 2025
8dc0761
Merge pull request #1948 from folkertdev/forward-percentage
Amanieu Nov 4, 2025
5914877
Merge pull request #1846 from sayantn/new-amx-intrinsics
Amanieu Nov 4, 2025
9126145
Use generic SIMD masked load/stores for avx512 masked load/stores
sayantn Oct 10, 2025
7ea8483
Use generic SIMD intrinsics for AVX `maskload` and `maskstore` intrin…
sayantn Nov 6, 2025
83e4d81
Prepare for merging from rust-lang/rust
invalid-email-address Nov 10, 2025
485ea04
Merge ref '8401398e1f14' from rust-lang/rust
invalid-email-address Nov 10, 2025
c610b1d
Merge pull request #1954 from rust-lang/rustc-pull
sayantn Nov 10, 2025
1b3abfe
rename default branch to main
marcoieni Nov 11, 2025
148a750
add logic tests for ternarylogic
folkertdev Nov 11, 2025
b851387
Merge pull request #1958 from folkertdev/ternary-logic-tests
sayantn Nov 11, 2025
e94ac6b
improve ternary logic tests
folkertdev Nov 11, 2025
849ace0
Merge pull request #1953 from sayantn/masked-load-store
folkertdev Nov 12, 2025
0ab7c9e
Use SIMD intrinsics for vector shifts
sayantn Nov 12, 2025
84c44c4
Merge pull request #1959 from folkertdev/ternary-logic-tests-2
folkertdev Nov 13, 2025
d84c695
Merge pull request #1955 from sayantn/vector-shifts
folkertdev Nov 13, 2025
0882a6e
aarch64: Remove withdrawn FEAT_TME
maurer Nov 14, 2025
8fe87e9
correct some `#[simd_test]` attributes
sayantn Nov 10, 2025
7f3fcb3
Merge pull request #1956 from sayantn/fix-tests
folkertdev Nov 16, 2025
57436fe
Merge pull request #1960 from maurer/remove-tme
folkertdev Nov 17, 2025
ac2d972
correct signedness of pmadd arguments
folkertdev Nov 17, 2025
2788686
Merge pull request #1957 from marcoieni/rename-default-branch-to-main
marcoieni Nov 17, 2025
2922cbd
Merge pull request #1961 from folkertdev/pmadd-correct-signedness
sayantn Nov 17, 2025
ae682a3
enable `avx10_target_feature` in core (used by stdarch)
folkertdev Nov 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions library/stdarch/crates/core_arch/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,17 @@ macro_rules! simd_extract {
($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
}

#[allow(unused)]
macro_rules! simd_masked_load {
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
$crate::intrinsics::simd::simd_masked_load::<_, _, _, { $align }>($mask, $ptr, $default)
};
}

#[allow(unused)]
macro_rules! simd_masked_store {
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
$crate::intrinsics::simd::simd_masked_store::<_, _, _, { $align }>($mask, $ptr, $default)
};
}
40 changes: 16 additions & 24 deletions library/stdarch/crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,8 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_pd())
}

/// Stores packed double-precision (64-bit) floating-point elements from `a`
Expand All @@ -1687,7 +1688,8 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Loads packed double-precision (64-bit) floating-point elements from memory
Expand All @@ -1700,7 +1702,8 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d)
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
maskloadpd(mem_addr as *const i8, mask.as_i64x2())
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_pd())
}

/// Stores packed double-precision (64-bit) floating-point elements from `a`
Expand All @@ -1712,7 +1715,8 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Loads packed single-precision (32-bit) floating-point elements from memory
Expand All @@ -1725,7 +1729,8 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
maskloadps256(mem_addr as *const i8, mask.as_i32x8())
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_ps())
}

/// Stores packed single-precision (32-bit) floating-point elements from `a`
Expand All @@ -1737,7 +1742,8 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Loads packed single-precision (32-bit) floating-point elements from memory
Expand All @@ -1750,7 +1756,8 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256)
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
maskloadps(mem_addr as *const i8, mask.as_i32x4())
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_ps())
}

/// Stores packed single-precision (32-bit) floating-point elements from `a`
Expand All @@ -1762,7 +1769,8 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
Expand Down Expand Up @@ -3147,22 +3155,6 @@ unsafe extern "C" {
fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
#[link_name = "llvm.x86.avx.vpermilvar.pd"]
fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
#[link_name = "llvm.x86.avx.maskload.pd.256"]
fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
#[link_name = "llvm.x86.avx.maskstore.pd.256"]
fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
#[link_name = "llvm.x86.avx.maskload.pd"]
fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
#[link_name = "llvm.x86.avx.maskstore.pd"]
fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
#[link_name = "llvm.x86.avx.maskload.ps.256"]
fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
#[link_name = "llvm.x86.avx.maskstore.ps.256"]
fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
#[link_name = "llvm.x86.avx.maskload.ps"]
fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
#[link_name = "llvm.x86.avx.maskstore.ps"]
fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
#[link_name = "llvm.x86.avx.ldu.dq.256"]
fn vlddqu(mem_addr: *const i8) -> i8x32;
#[link_name = "llvm.x86.avx.rcp.ps.256"]
Expand Down
40 changes: 16 additions & 24 deletions library/stdarch/crates/core_arch/src/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,8 @@ pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
}

/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
Expand All @@ -1799,7 +1800,8 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
}

/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
Expand All @@ -1812,7 +1814,8 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
}

/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
Expand All @@ -1825,7 +1828,8 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
}

/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1838,7 +1842,8 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
}

/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1851,7 +1856,8 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
}

/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1864,7 +1870,8 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
}

/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1877,7 +1884,8 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
}

/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
Expand Down Expand Up @@ -3645,22 +3653,6 @@ unsafe extern "C" {
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
#[link_name = "llvm.x86.avx2.maskload.d"]
fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.maskload.d.256"]
fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.maskload.q"]
fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.maskload.q.256"]
fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.maskstore.d"]
fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
#[link_name = "llvm.x86.avx2.maskstore.d.256"]
fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
#[link_name = "llvm.x86.avx2.maskstore.q"]
fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
Expand Down
Loading