diff --git a/coresimd/ppsv/api/minimal.rs b/coresimd/ppsv/api/minimal.rs index 4470bd6c31..fd1ffdb82d 100644 --- a/coresimd/ppsv/api/minimal.rs +++ b/coresimd/ppsv/api/minimal.rs @@ -81,6 +81,10 @@ macro_rules! impl_minimal { simd_insert(self, index as u32, new_value) } } + + impl super::api::Simd for $id { + type Element = $elem_ty; + } } } diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs index 857264b9f3..931ed5ccf6 100644 --- a/coresimd/ppsv/api/mod.rs +++ b/coresimd/ppsv/api/mod.rs @@ -112,9 +112,8 @@ mod partial_eq; // TODO: //#[macro_use] //mod partial_ord; -// TODO: -//#[macro_use] -//mod shuffles; +#[macro_use] +pub mod shuffles; // TODO: //#[macro_use] //mod gather_scatter; @@ -125,9 +124,15 @@ mod scalar_shifts; #[macro_use] mod shifts; -/// Sealed trait used for constraining select implementations. +/// Sealed trait used to constrain select implementations. pub trait Lanes {} +/// Sealed trait used to constraint vector shuffles. +pub trait Simd { + /// Element type of the SIMD vector. + type Element; +} + /// Defines a portable packed SIMD floating-point vector type. macro_rules! simd_f_ty { ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, diff --git a/coresimd/ppsv/api/shuffles.rs b/coresimd/ppsv/api/shuffles.rs new file mode 100644 index 0000000000..3bd5bd3d76 --- /dev/null +++ b/coresimd/ppsv/api/shuffles.rs @@ -0,0 +1,393 @@ +//! Shuffle vectors +#![allow(unused)] + +macro_rules! impl_shuffle { + ($_e:expr) => { + use super::Simd; + + /// This trait is not public. + /// + /// It is only used to constrain the return + /// type of the vector shuffles. + pub trait Shuffle { + /// The result type of the shuffle. + type Output; + } + + // These implementations for the native + // types allow constraining the the shuffles for + // each portable vector type by just implementing + // the Simd trait once for them. + + impl Shuffle<[u32; 2]> for i8 { + type Output = super::super::i8x2; + } + impl Shuffle<[u32; 4]> for i8 { + type Output = super::super::i8x4; + } + impl Shuffle<[u32; 8]> for i8 { + type Output = super::super::i8x8; + } + impl Shuffle<[u32; 16]> for i8 { + type Output = super::super::i8x16; + } + impl Shuffle<[u32; 32]> for i8 { + type Output = super::super::i8x32; + } + impl Shuffle<[u32; 64]> for i8 { + type Output = super::super::i8x64; + } + impl Shuffle<[u32; 2]> for u8 { + type Output = super::super::u8x2; + } + impl Shuffle<[u32; 4]> for u8 { + type Output = super::super::u8x4; + } + impl Shuffle<[u32; 8]> for u8 { + type Output = super::super::u8x8; + } + impl Shuffle<[u32; 16]> for u8 { + type Output = super::super::u8x16; + } + impl Shuffle<[u32; 32]> for u8 { + type Output = super::super::u8x32; + } + impl Shuffle<[u32; 64]> for u8 { + type Output = super::super::u8x64; + } + + impl Shuffle<[u32; 2]> for i16 { + type Output = super::super::i16x2; + } + impl Shuffle<[u32; 4]> for i16 { + type Output = super::super::i16x4; + } + impl Shuffle<[u32; 8]> for i16 { + type Output = super::super::i16x8; + } + impl Shuffle<[u32; 16]> for i16 { + type Output = super::super::i16x16; + } + impl Shuffle<[u32; 32]> for i16 { + type Output = super::super::i16x32; + } + impl Shuffle<[u32; 2]> for u16 { + type Output = super::super::u16x2; + } + impl Shuffle<[u32; 4]> for u16 { + type Output = super::super::u16x4; + } + impl Shuffle<[u32; 8]> for u16 { + type Output = super::super::u16x8; + } + impl Shuffle<[u32; 16]> for u16 { + type Output = super::super::u16x16; + } + impl Shuffle<[u32; 32]> for u16 { + type Output = super::super::u16x32; + } + + impl Shuffle<[u32; 2]> for i32 { + type Output = super::super::i32x2; + } + impl Shuffle<[u32; 4]> for i32 { + type Output = super::super::i32x4; + } + impl Shuffle<[u32; 8]> for i32 { + type Output = super::super::i32x8; + } + impl Shuffle<[u32; 16]> for i32 { + type Output = super::super::i32x16; + } + impl Shuffle<[u32; 2]> for u32 { + type Output = super::super::u32x2; + } + impl Shuffle<[u32; 4]> for u32 { + type Output = super::super::u32x4; + } + impl Shuffle<[u32; 8]> for u32 { + type Output = super::super::u32x8; + } + impl Shuffle<[u32; 16]> for u32 { + type Output = super::super::u32x16; + } + impl Shuffle<[u32; 2]> for f32 { + type Output = super::super::f32x2; + } + impl Shuffle<[u32; 4]> for f32 { + type Output = super::super::f32x4; + } + impl Shuffle<[u32; 8]> for f32 { + type Output = super::super::f32x8; + } + impl Shuffle<[u32; 16]> for f32 { + type Output = super::super::f32x16; + } + + impl Shuffle<[u32; 2]> for i64 { + type Output = super::super::i64x2; + } + impl Shuffle<[u32; 4]> for i64 { + type Output = super::super::i64x4; + } + impl Shuffle<[u32; 8]> for i64 { + type Output = super::super::i64x8; + } + impl Shuffle<[u32; 2]> for u64 { + type Output = super::super::u64x2; + } + impl Shuffle<[u32; 4]> for u64 { + type Output = super::super::u64x4; + } + impl Shuffle<[u32; 8]> for u64 { + type Output = super::super::u64x8; + } + impl Shuffle<[u32; 2]> for f64 { + type Output = super::super::f64x2; + } + impl Shuffle<[u32; 4]> for f64 { + type Output = super::super::f64x4; + } + impl Shuffle<[u32; 8]> for f64 { + type Output = super::super::f64x8; + } + + /// The shuffle intrinsics are reimported here. + /// + /// At typeck both input vector types are required to be equal and thus + /// have the same length, the arrays of indices are required to have the + /// correct lengths, and the result type is constrained by the `where` + /// clauses below such that only the correct result types can type check. + /// + /// FIXME: The only way to produce a monomorphization-time error here is + /// to pass the intrinsic an element index that is out-of-bounds. Fixing + /// this probably requires checking that the indices are in-bounds in + /// MIR typeck. + mod intrinsics { + use super::{Simd, Shuffle}; + extern "platform-intrinsic" { + pub fn simd_shuffle2(a: T, b: T, indices: [u32; 2]) -> U + where ::Element: Shuffle<[u32; 2], Output = U>; + + pub fn simd_shuffle4(a: T, b: T, indices: [u32; 4]) -> U + where ::Element: Shuffle<[u32; 4], Output = U>; + + pub fn simd_shuffle8(a: T, b: T, indices: [u32; 8]) -> U + where ::Element: Shuffle<[u32; 8], Output = U>; + + pub fn simd_shuffle16(a: T, b: T, indices: [u32; 16]) -> U + where ::Element: Shuffle<[u32; 16], Output = U>; + + pub fn simd_shuffle32(a: T, b: T, indices: [u32; 32]) -> U + where ::Element: Shuffle<[u32; 32], Output = U>; + + pub fn simd_shuffle64(a: T, b: T, indices: [u32; 64]) -> U + where ::Element: Shuffle<[u32; 64], Output = U>; + } + } + + pub use self::intrinsics::simd_shuffle2 as __shuffle_vector2; + pub use self::intrinsics::simd_shuffle4 as __shuffle_vector4; + pub use self::intrinsics::simd_shuffle8 as __shuffle_vector8; + pub use self::intrinsics::simd_shuffle16 as __shuffle_vector16; + pub use self::intrinsics::simd_shuffle32 as __shuffle_vector32; + pub use self::intrinsics::simd_shuffle64 as __shuffle_vector64; + } +} + +vector_impl!([impl_shuffle, 0]); + +/// Shuffles vector elements. +/// +/// This macro returns a new vector that contains a shuffle of the elements in +/// one or two input vectors: +/// +/// * `shuffle!(vec, [indices...])`: one-vector version +/// * `shuffle!(vec0, vec1, [indices...])`: two-vector version +/// +/// In the two-vector version both `vec0` and `vec1` must have the same type. +/// The element type of the resulting vector is the element type of the input +/// vector. +/// +/// The number of `indices` must be a power-of-two in range `[0, 64)` smaller +/// than two times the number of lanes in the input vector. The length of the +/// resulting vector equals the number of indices provided. +/// +/// Given a vector with `N` lanes, the indices in range `[0, N)` refer to the +/// `N` elements in the vector. In the two-vector version, the indices in range +/// `[N, 2*N)` refer to elements in the second vector. +/// +/// # Examples +/// +/// ``` +/// # #![cfg_attr(not(dox), feature(stdsimd))] +/// # #![cfg_attr(not(dox), no_std)] +/// # #[cfg(not(dox))] +/// # extern crate std as real_std; +/// # #[cfg(not(dox))] +/// # #[macro_use] +/// # extern crate stdsimd as std; +/// # use std::simd::*; +/// # fn main() { +/// // Shuffle allows reordering the elements of a vector: +/// let x = i32x4::new(1, 2, 3, 4); +/// let r = shuffle!(x, [2, 1, 3, 0]); +/// assert_eq!(r, i32x4::new(3, 2, 4, 1)); +/// +/// // The resulting vector can be smaller than the input: +/// let r = shuffle!(x, [1, 3]); +/// assert_eq!(r, i32x2::new(2, 4)); +/// +/// // Equal: +/// let r = shuffle!(x, [1, 3, 2, 0]); +/// assert_eq!(r, i32x4::new(2, 4, 3, 1)); +/// +/// // Or larger: +/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); +/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); +/// // At most 2 * the number of lanes in the input vector. +/// +/// // It also allows reordering elements of two vectors: +/// let y = i32x4::new(5, 6, 7, 8); +/// let r = shuffle!(x, y, [4, 0, 5, 1]); +/// assert_eq!(r, i32x4::new(5, 1, 6, 2)); +/// // And this can be used to construct larger or smaller +/// // vectors as well. +/// # } +/// ``` +#[macro_export] +macro_rules! shuffle { + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => { + { + #[allow(unused_unsafe)] + let r = unsafe { + $crate::simd::__shuffle_vector2( + $vec0, $vec1, + [$l0, $l1] + ) + }; + r + } + }; + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => { + { + #[allow(unused_unsafe)] + let r = unsafe { + $crate::simd::__shuffle_vector4( + $vec0, $vec1, + [$l0, $l1, $l2, $l3] + ) + }; + r + } + }; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => { + { + #[allow(unused_unsafe)] + let r = unsafe { + $crate::simd::__shuffle_vector8( + $vec0, $vec1, + [$l0, $l1, $l2, $l3, + $l4, $l5, $l6, $l7] + ) + }; + r + } + }; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => { + { + #[allow(unused_unsafe)] + let r = unsafe { + $crate::simd::__shuffle_vector16( + $vec0, $vec1, + [$l0, $l1, $l2, $l3, + $l4, $l5, $l6, $l7, + $l8, $l9, $l10, $l11, + $l12, $l13, $l14, $l15] + ) + }; + r + } + }; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr, + $l16:expr, $l17:expr, $l18:expr, $l19:expr, + $l20:expr, $l21:expr, $l22:expr, $l23:expr, + $l24:expr, $l25:expr, $l26:expr, $l27:expr, + $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => { + { + #[allow(unused_unsafe)] + let r = unsafe { + $crate::simd::__shuffle_vector32( + $vec0, $vec1, + [$l0, $l1, $l2, $l3, + $l4, $l5, $l6, $l7, + $l8, $l9, $l10, $l11, + $l12, $l13, $l14, $l15, + $l16, $l17, $l18, $l19, + $l20, $l21, $l22, $l23, + $l24, $l25, $l26, $l27, + $l28, $l29, $l30, $l31] + ) + }; + r + } + }; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr, + $l16:expr, $l17:expr, $l18:expr, $l19:expr, + $l20:expr, $l21:expr, $l22:expr, $l23:expr, + $l24:expr, $l25:expr, $l26:expr, $l27:expr, + $l28:expr, $l29:expr, $l30:expr, $l31:expr, + $l32:expr, $l33:expr, $l34:expr, $l35:expr, + $l36:expr, $l37:expr, $l38:expr, $l39:expr, + $l40:expr, $l41:expr, $l42:expr, $l43:expr, + $l44:expr, $l45:expr, $l46:expr, $l47:expr, + $l48:expr, $l49:expr, $l50:expr, $l51:expr, + $l52:expr, $l53:expr, $l54:expr, $l55:expr, + $l56:expr, $l57:expr, $l58:expr, $l59:expr, + $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => { + { + #[allow(unused_unsafe)] + let r = unsafe { + $crate::simd::__shuffle_vector64( + $vec0, $vec1, + [$l0, $l1, $l2, $l3, + $l4, $l5, $l6, $l7, + $l8, $l9, $l10, $l11, + $l12, $l13, $l14, $l15, + $l16, $l17, $l18, $l19, + $l20, $l21, $l22, $l23, + $l24, $l25, $l26, $l27, + $l28, $l29, $l30, $l31, + $l32, $l33, $l34, $l35, + $l36, $l37, $l38, $l39, + $l40, $l41, $l42, $l43, + $l44, $l45, $l46, $l47, + $l48, $l49, $l50, $l51, + $l52, $l53, $l54, $l55, + $l56, $l57, $l58, $l59, + $l60, $l61, $l62, $l63] + ) + }; + r + } + }; + ($vec:expr, [$($l:expr),*]) => { + match $vec { + v => shuffle!(v, v, [$($l),*]) + } + } +} diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs index 4d5c92dad0..c6ef497060 100644 --- a/coresimd/ppsv/mod.rs +++ b/coresimd/ppsv/mod.rs @@ -86,3 +86,21 @@ impl FromBits for T { /// Work arounds code generation issues. mod codegen; + +/// Exposes private shuffle intrinsics +/// used by the `shuffle!` macro. +#[allow(unused)] +macro_rules! expose_shuffles { + ($_e:expr) => { + pub use self::api::shuffles::{ + __shuffle_vector2, + __shuffle_vector4, + __shuffle_vector8, + __shuffle_vector16, + __shuffle_vector32, + __shuffle_vector64, + }; + } +} + +vector_impl!([expose_shuffles, 0]); diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs index c83c2d4b35..2ba3944bd4 100644 --- a/coresimd/simd_llvm.rs +++ b/coresimd/simd_llvm.rs @@ -15,6 +15,8 @@ extern "platform-intrinsic" { pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U; pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; + pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U; + pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U; pub fn simd_insert(x: T, idx: u32, val: U) -> T; pub fn simd_extract(x: T, idx: u32) -> U; diff --git a/crates/coresimd/tests/shuffles.rs b/crates/coresimd/tests/shuffles.rs new file mode 100644 index 0000000000..a5d7ba95b1 --- /dev/null +++ b/crates/coresimd/tests/shuffles.rs @@ -0,0 +1,242 @@ +#![feature(stdsimd)] + +#[macro_use] +extern crate coresimd; + +use coresimd::simd::*; + +#[test] +fn shuffle2() { + let x = u8x2::new(3, 42); + let e = u8x2::new(42, 3); + let r = shuffle!(x, [1, 0]); + assert_eq!(r, e); + + let y = u8x2::new(7, 12); + let e = u8x2::new(42, 12); + let r = shuffle!(x, y, [1, 3]); + assert_eq!(r, e); + + let x = i16x4::new(1, 2, 3, 4); + let e = i16x2::new(2, 4); + let r = shuffle!(x, [1, 3]); + assert_eq!(r, e); + + let y = i16x4::new(5, 6, 7, 8); + let e = i16x2::new(2, 7); + let r = shuffle!(x, y, [1, 6]); + assert_eq!(r, e); +} + +#[test] +fn shuffle4() { + let x = u8x2::new(3, 42); + let e = u8x4::new(42, 3, 42, 42); + let r = shuffle!(x, [1, 0, 1, 1]); + assert_eq!(r, e); + + let x = u32x4::new(1, 2, 3, 4); + let e = u32x4::new(2, 4, 1, 3); + let r = shuffle!(x, [1, 3, 0, 2]); + assert_eq!(r, e); + + let y = u32x4::new(5, 6, 7, 8); + let e = u32x4::new(3, 2, 6, 1); + let r = shuffle!(x, y, [2, 1, 5, 0]); + assert_eq!(r, e); + + let x = i32x8::new(1, 2, 3, 4, 7, 3, 2, 1); + let e = i32x4::new(2, 7, 3, 3); + let r = shuffle!(x, [1, 4, 5, 5]); + assert_eq!(r, e); + + let y = i32x8::new(5, 6, 7, 8, 1, 5, 2, 3); + let e = i32x4::new(3, 5, 7, 3); + let r = shuffle!(x, y, [15, 13, 4, 5]); + assert_eq!(r, e); +} + +#[test] +fn shuffle8() { + let x = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let e = f32x8::new(2., 8., 1., 3., 5., 2., 7., 4.); + let r = shuffle!(x, [1, 7, 0, 2, 4, 1, 6, 3]); + assert_eq!(r, e); + + let y = f32x8::new(51., 61., 71., 81., 11., 21., 31., 41.); + let e = f32x8::new(2., 8., 51., 3., 71., 41., 7., 4.); + let r = shuffle!(x, y, [1, 7, 8, 2, 10, 15, 6, 3]); + assert_eq!(r, e); +} + +#[test] +fn shuffle16() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = u8x16::new( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let y = u8x16::new( + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let e = u8x16::new( + 0, 1, 2, 3, + 16, 17, 18, 19, + 8, 9, 10, 11, + 20, 21, 22, 23 + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let r = shuffle!( + x, y, + [ + 0, 1, 2, 3, + 16, 17, 18, 19, + 8, 9, 10, 11, + 20, 21, 22, 23 + ] + ); + assert_eq!(r, e); +} + +#[test] +fn shuffle32() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = u8x32::new( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let y = u8x32::new( + 32, 33, 34, 35, + 36, 37, 38, 39, + 40, 41, 42, 43, + 44, 45, 46, 47, + 48, 49, 50, 51, + 52, 53, 54, 55, + 56, 57, 58, 59, + 60, 61, 62, 63, + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let e = u8x32::new( + 0, 1, 2, 3, + 32, 33, 34, 35, + 8, 9, 10, 11, + 36, 37, 38, 39, + 8, 9, 10, 11, + 40, 41, 42, 43, + 12, 13, 14, 15, + 44, 45, 46, 47 + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let r = shuffle!( + x, y, + [ + 0, 1, 2, 3, + 32, 33, 34, 35, + 8, 9, 10, 11, + 36, 37, 38, 39, + 8, 9, 10, 11, + 40, 41, 42, 43, + 12, 13, 14, 15, + 44, 45, 46, 47 + ] + ); + assert_eq!(r, e); +} + +#[test] +fn shuffle64() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = u8x64::new( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31, + 32, 33, 34, 35, + 36, 37, 38, 39, + 40, 41, 42, 43, + 44, 45, 46, 47, + 48, 49, 50, 51, + 52, 53, 54, 55, + 56, 57, 58, 59, + 60, 61, 62, 63, + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let y = u8x64::new( + 64, 65, 66, 67, + 68, 69, 70, 71, + 72, 73, 74, 75, + 76, 77, 78, 79, + 80, 81, 82, 83, + 84, 85, 86, 87, + 88, 89, 90, 91, + 92, 93, 94, 95, + 96, 97, 98, 99, + 100, 101, 102, 103, + 104, 105, 106, 107, + 108, 109, 110, 111, + 112, 113, 114, 115, + 116, 117, 118, 119, + 120, 121, 122, 123, + 124, 125, 126, 127, + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let e = u8x64::new( + 0, 1, 2, 3, + 64, 65, 66, 67, + 8, 9, 10, 11, + 68, 69, 70, 71, + 8, 9, 10, 11, + 72, 73, 74, 75, + 12, 13, 14, 15, + 76, 77, 78, 79, + 16, 17, 18, 19, + 80, 81, 82, 83, + 20, 21, 22, 23, + 84, 85, 86, 87, + 88, 89, 90, 91, + 24, 25, 26, 27, + 92, 93, 94, 95, + 28, 29, 30, 31 + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let r = shuffle!( + x, y, + [ + 0, 1, 2, 3, + 64, 65, 66, 67, + 8, 9, 10, 11, + 68, 69, 70, 71, + 8, 9, 10, 11, + 72, 73, 74, 75, + 12, 13, 14, 15, + 76, 77, 78, 79, + 16, 17, 18, 19, + 80, 81, 82, 83, + 20, 21, 22, 23, + 84, 85, 86, 87, + 88, 89, 90, 91, + 24, 25, 26, 27, + 92, 93, 94, 95, + 28, 29, 30, 31 + ] + ); + assert_eq!(r, e); +} diff --git a/crates/stdsimd/src/lib.rs b/crates/stdsimd/src/lib.rs index abee4fcfd4..b7a6bac4bc 100644 --- a/crates/stdsimd/src/lib.rs +++ b/crates/stdsimd/src/lib.rs @@ -7,7 +7,7 @@ //! //! [stdsimd]: https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/ -#![feature(const_fn, integer_atomics, staged_api, stdsimd)] +#![feature(const_fn, integer_atomics, staged_api, stdsimd, use_extern_macros)] #![feature(doc_cfg, allow_internal_unstable)] #![cfg_attr(feature = "cargo-clippy", allow(shadow_reuse))] #![cfg_attr(target_os = "linux", feature(linkage))] @@ -28,6 +28,7 @@ extern crate std; mod stdsimd; pub use stdsimd::*; +pub use coresimd::shuffle; #[allow(unused_imports)] use __do_not_use_this_import::fs; diff --git a/crates/stdsimd/tests/shuffle.rs b/crates/stdsimd/tests/shuffle.rs new file mode 100644 index 0000000000..cd9cd76bdf --- /dev/null +++ b/crates/stdsimd/tests/shuffle.rs @@ -0,0 +1,17 @@ +#![feature(stdsimd)] +#![deny(warnings)] + +#[macro_use] +extern crate stdsimd; + +use stdsimd::simd::*; + +fn main() { + // check that shuffle! does not produce warnings when used in an unsafe block + unsafe { + let a = f32x4::new(0., 1., 2., 3.); + let b = f32x4::new(4., 5., 6., 7.); + let e = f32x4::new(0., 2., 4., 6.); + assert_eq!(e, shuffle!(a, b, [0, 2, 4, 6])); + } +}