diff --git a/coresimd/ppsv/api/minimal.rs b/coresimd/ppsv/api/minimal.rs
index 4470bd6c31..fd1ffdb82d 100644
--- a/coresimd/ppsv/api/minimal.rs
+++ b/coresimd/ppsv/api/minimal.rs
@@ -81,6 +81,10 @@ macro_rules! impl_minimal {
simd_insert(self, index as u32, new_value)
}
}
+
+ impl super::api::Simd for $id {
+ type Element = $elem_ty;
+ }
}
}
diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs
index 857264b9f3..931ed5ccf6 100644
--- a/coresimd/ppsv/api/mod.rs
+++ b/coresimd/ppsv/api/mod.rs
@@ -112,9 +112,8 @@ mod partial_eq;
// TODO:
//#[macro_use]
//mod partial_ord;
-// TODO:
-//#[macro_use]
-//mod shuffles;
+#[macro_use]
+pub mod shuffles;
// TODO:
//#[macro_use]
//mod gather_scatter;
@@ -125,9 +124,15 @@ mod scalar_shifts;
#[macro_use]
mod shifts;
-/// Sealed trait used for constraining select implementations.
+/// Sealed trait used to constrain select implementations.
pub trait Lanes {}
+/// Sealed trait used to constraint vector shuffles.
+pub trait Simd {
+ /// Element type of the SIMD vector.
+ type Element;
+}
+
/// Defines a portable packed SIMD floating-point vector type.
macro_rules! simd_f_ty {
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
diff --git a/coresimd/ppsv/api/shuffles.rs b/coresimd/ppsv/api/shuffles.rs
new file mode 100644
index 0000000000..3bd5bd3d76
--- /dev/null
+++ b/coresimd/ppsv/api/shuffles.rs
@@ -0,0 +1,393 @@
+//! Shuffle vectors
+#![allow(unused)]
+
+macro_rules! impl_shuffle {
+ ($_e:expr) => {
+ use super::Simd;
+
+ /// This trait is not public.
+ ///
+ /// It is only used to constrain the return
+ /// type of the vector shuffles.
+ pub trait Shuffle {
+ /// The result type of the shuffle.
+ type Output;
+ }
+
+ // These implementations for the native
+ // types allow constraining the the shuffles for
+ // each portable vector type by just implementing
+ // the Simd trait once for them.
+
+ impl Shuffle<[u32; 2]> for i8 {
+ type Output = super::super::i8x2;
+ }
+ impl Shuffle<[u32; 4]> for i8 {
+ type Output = super::super::i8x4;
+ }
+ impl Shuffle<[u32; 8]> for i8 {
+ type Output = super::super::i8x8;
+ }
+ impl Shuffle<[u32; 16]> for i8 {
+ type Output = super::super::i8x16;
+ }
+ impl Shuffle<[u32; 32]> for i8 {
+ type Output = super::super::i8x32;
+ }
+ impl Shuffle<[u32; 64]> for i8 {
+ type Output = super::super::i8x64;
+ }
+ impl Shuffle<[u32; 2]> for u8 {
+ type Output = super::super::u8x2;
+ }
+ impl Shuffle<[u32; 4]> for u8 {
+ type Output = super::super::u8x4;
+ }
+ impl Shuffle<[u32; 8]> for u8 {
+ type Output = super::super::u8x8;
+ }
+ impl Shuffle<[u32; 16]> for u8 {
+ type Output = super::super::u8x16;
+ }
+ impl Shuffle<[u32; 32]> for u8 {
+ type Output = super::super::u8x32;
+ }
+ impl Shuffle<[u32; 64]> for u8 {
+ type Output = super::super::u8x64;
+ }
+
+ impl Shuffle<[u32; 2]> for i16 {
+ type Output = super::super::i16x2;
+ }
+ impl Shuffle<[u32; 4]> for i16 {
+ type Output = super::super::i16x4;
+ }
+ impl Shuffle<[u32; 8]> for i16 {
+ type Output = super::super::i16x8;
+ }
+ impl Shuffle<[u32; 16]> for i16 {
+ type Output = super::super::i16x16;
+ }
+ impl Shuffle<[u32; 32]> for i16 {
+ type Output = super::super::i16x32;
+ }
+ impl Shuffle<[u32; 2]> for u16 {
+ type Output = super::super::u16x2;
+ }
+ impl Shuffle<[u32; 4]> for u16 {
+ type Output = super::super::u16x4;
+ }
+ impl Shuffle<[u32; 8]> for u16 {
+ type Output = super::super::u16x8;
+ }
+ impl Shuffle<[u32; 16]> for u16 {
+ type Output = super::super::u16x16;
+ }
+ impl Shuffle<[u32; 32]> for u16 {
+ type Output = super::super::u16x32;
+ }
+
+ impl Shuffle<[u32; 2]> for i32 {
+ type Output = super::super::i32x2;
+ }
+ impl Shuffle<[u32; 4]> for i32 {
+ type Output = super::super::i32x4;
+ }
+ impl Shuffle<[u32; 8]> for i32 {
+ type Output = super::super::i32x8;
+ }
+ impl Shuffle<[u32; 16]> for i32 {
+ type Output = super::super::i32x16;
+ }
+ impl Shuffle<[u32; 2]> for u32 {
+ type Output = super::super::u32x2;
+ }
+ impl Shuffle<[u32; 4]> for u32 {
+ type Output = super::super::u32x4;
+ }
+ impl Shuffle<[u32; 8]> for u32 {
+ type Output = super::super::u32x8;
+ }
+ impl Shuffle<[u32; 16]> for u32 {
+ type Output = super::super::u32x16;
+ }
+ impl Shuffle<[u32; 2]> for f32 {
+ type Output = super::super::f32x2;
+ }
+ impl Shuffle<[u32; 4]> for f32 {
+ type Output = super::super::f32x4;
+ }
+ impl Shuffle<[u32; 8]> for f32 {
+ type Output = super::super::f32x8;
+ }
+ impl Shuffle<[u32; 16]> for f32 {
+ type Output = super::super::f32x16;
+ }
+
+ impl Shuffle<[u32; 2]> for i64 {
+ type Output = super::super::i64x2;
+ }
+ impl Shuffle<[u32; 4]> for i64 {
+ type Output = super::super::i64x4;
+ }
+ impl Shuffle<[u32; 8]> for i64 {
+ type Output = super::super::i64x8;
+ }
+ impl Shuffle<[u32; 2]> for u64 {
+ type Output = super::super::u64x2;
+ }
+ impl Shuffle<[u32; 4]> for u64 {
+ type Output = super::super::u64x4;
+ }
+ impl Shuffle<[u32; 8]> for u64 {
+ type Output = super::super::u64x8;
+ }
+ impl Shuffle<[u32; 2]> for f64 {
+ type Output = super::super::f64x2;
+ }
+ impl Shuffle<[u32; 4]> for f64 {
+ type Output = super::super::f64x4;
+ }
+ impl Shuffle<[u32; 8]> for f64 {
+ type Output = super::super::f64x8;
+ }
+
+ /// The shuffle intrinsics are reimported here.
+ ///
+ /// At typeck both input vector types are required to be equal and thus
+ /// have the same length, the arrays of indices are required to have the
+ /// correct lengths, and the result type is constrained by the `where`
+ /// clauses below such that only the correct result types can type check.
+ ///
+ /// FIXME: The only way to produce a monomorphization-time error here is
+ /// to pass the intrinsic an element index that is out-of-bounds. Fixing
+ /// this probably requires checking that the indices are in-bounds in
+ /// MIR typeck.
+ mod intrinsics {
+ use super::{Simd, Shuffle};
+ extern "platform-intrinsic" {
+ pub fn simd_shuffle2(a: T, b: T, indices: [u32; 2]) -> U
+ where ::Element: Shuffle<[u32; 2], Output = U>;
+
+ pub fn simd_shuffle4(a: T, b: T, indices: [u32; 4]) -> U
+ where ::Element: Shuffle<[u32; 4], Output = U>;
+
+ pub fn simd_shuffle8(a: T, b: T, indices: [u32; 8]) -> U
+ where ::Element: Shuffle<[u32; 8], Output = U>;
+
+ pub fn simd_shuffle16(a: T, b: T, indices: [u32; 16]) -> U
+ where ::Element: Shuffle<[u32; 16], Output = U>;
+
+ pub fn simd_shuffle32(a: T, b: T, indices: [u32; 32]) -> U
+ where ::Element: Shuffle<[u32; 32], Output = U>;
+
+ pub fn simd_shuffle64(a: T, b: T, indices: [u32; 64]) -> U
+ where ::Element: Shuffle<[u32; 64], Output = U>;
+ }
+ }
+
+ pub use self::intrinsics::simd_shuffle2 as __shuffle_vector2;
+ pub use self::intrinsics::simd_shuffle4 as __shuffle_vector4;
+ pub use self::intrinsics::simd_shuffle8 as __shuffle_vector8;
+ pub use self::intrinsics::simd_shuffle16 as __shuffle_vector16;
+ pub use self::intrinsics::simd_shuffle32 as __shuffle_vector32;
+ pub use self::intrinsics::simd_shuffle64 as __shuffle_vector64;
+ }
+}
+
+vector_impl!([impl_shuffle, 0]);
+
+/// Shuffles vector elements.
+///
+/// This macro returns a new vector that contains a shuffle of the elements in
+/// one or two input vectors:
+///
+/// * `shuffle!(vec, [indices...])`: one-vector version
+/// * `shuffle!(vec0, vec1, [indices...])`: two-vector version
+///
+/// In the two-vector version both `vec0` and `vec1` must have the same type.
+/// The element type of the resulting vector is the element type of the input
+/// vector.
+///
+/// The number of `indices` must be a power-of-two in range `[0, 64)` smaller
+/// than two times the number of lanes in the input vector. The length of the
+/// resulting vector equals the number of indices provided.
+///
+/// Given a vector with `N` lanes, the indices in range `[0, N)` refer to the
+/// `N` elements in the vector. In the two-vector version, the indices in range
+/// `[N, 2*N)` refer to elements in the second vector.
+///
+/// # Examples
+///
+/// ```
+/// # #![cfg_attr(not(dox), feature(stdsimd))]
+/// # #![cfg_attr(not(dox), no_std)]
+/// # #[cfg(not(dox))]
+/// # extern crate std as real_std;
+/// # #[cfg(not(dox))]
+/// # #[macro_use]
+/// # extern crate stdsimd as std;
+/// # use std::simd::*;
+/// # fn main() {
+/// // Shuffle allows reordering the elements of a vector:
+/// let x = i32x4::new(1, 2, 3, 4);
+/// let r = shuffle!(x, [2, 1, 3, 0]);
+/// assert_eq!(r, i32x4::new(3, 2, 4, 1));
+///
+/// // The resulting vector can be smaller than the input:
+/// let r = shuffle!(x, [1, 3]);
+/// assert_eq!(r, i32x2::new(2, 4));
+///
+/// // Equal:
+/// let r = shuffle!(x, [1, 3, 2, 0]);
+/// assert_eq!(r, i32x4::new(2, 4, 3, 1));
+///
+/// // Or larger:
+/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]);
+/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3));
+/// // At most 2 * the number of lanes in the input vector.
+///
+/// // It also allows reordering elements of two vectors:
+/// let y = i32x4::new(5, 6, 7, 8);
+/// let r = shuffle!(x, y, [4, 0, 5, 1]);
+/// assert_eq!(r, i32x4::new(5, 1, 6, 2));
+/// // And this can be used to construct larger or smaller
+/// // vectors as well.
+/// # }
+/// ```
+#[macro_export]
+macro_rules! shuffle {
+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {
+ {
+ #[allow(unused_unsafe)]
+ let r = unsafe {
+ $crate::simd::__shuffle_vector2(
+ $vec0, $vec1,
+ [$l0, $l1]
+ )
+ };
+ r
+ }
+ };
+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {
+ {
+ #[allow(unused_unsafe)]
+ let r = unsafe {
+ $crate::simd::__shuffle_vector4(
+ $vec0, $vec1,
+ [$l0, $l1, $l2, $l3]
+ )
+ };
+ r
+ }
+ };
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {
+ {
+ #[allow(unused_unsafe)]
+ let r = unsafe {
+ $crate::simd::__shuffle_vector8(
+ $vec0, $vec1,
+ [$l0, $l1, $l2, $l3,
+ $l4, $l5, $l6, $l7]
+ )
+ };
+ r
+ }
+ };
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {
+ {
+ #[allow(unused_unsafe)]
+ let r = unsafe {
+ $crate::simd::__shuffle_vector16(
+ $vec0, $vec1,
+ [$l0, $l1, $l2, $l3,
+ $l4, $l5, $l6, $l7,
+ $l8, $l9, $l10, $l11,
+ $l12, $l13, $l14, $l15]
+ )
+ };
+ r
+ }
+ };
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr,
+ $l16:expr, $l17:expr, $l18:expr, $l19:expr,
+ $l20:expr, $l21:expr, $l22:expr, $l23:expr,
+ $l24:expr, $l25:expr, $l26:expr, $l27:expr,
+ $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {
+ {
+ #[allow(unused_unsafe)]
+ let r = unsafe {
+ $crate::simd::__shuffle_vector32(
+ $vec0, $vec1,
+ [$l0, $l1, $l2, $l3,
+ $l4, $l5, $l6, $l7,
+ $l8, $l9, $l10, $l11,
+ $l12, $l13, $l14, $l15,
+ $l16, $l17, $l18, $l19,
+ $l20, $l21, $l22, $l23,
+ $l24, $l25, $l26, $l27,
+ $l28, $l29, $l30, $l31]
+ )
+ };
+ r
+ }
+ };
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr,
+ $l16:expr, $l17:expr, $l18:expr, $l19:expr,
+ $l20:expr, $l21:expr, $l22:expr, $l23:expr,
+ $l24:expr, $l25:expr, $l26:expr, $l27:expr,
+ $l28:expr, $l29:expr, $l30:expr, $l31:expr,
+ $l32:expr, $l33:expr, $l34:expr, $l35:expr,
+ $l36:expr, $l37:expr, $l38:expr, $l39:expr,
+ $l40:expr, $l41:expr, $l42:expr, $l43:expr,
+ $l44:expr, $l45:expr, $l46:expr, $l47:expr,
+ $l48:expr, $l49:expr, $l50:expr, $l51:expr,
+ $l52:expr, $l53:expr, $l54:expr, $l55:expr,
+ $l56:expr, $l57:expr, $l58:expr, $l59:expr,
+ $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {
+ {
+ #[allow(unused_unsafe)]
+ let r = unsafe {
+ $crate::simd::__shuffle_vector64(
+ $vec0, $vec1,
+ [$l0, $l1, $l2, $l3,
+ $l4, $l5, $l6, $l7,
+ $l8, $l9, $l10, $l11,
+ $l12, $l13, $l14, $l15,
+ $l16, $l17, $l18, $l19,
+ $l20, $l21, $l22, $l23,
+ $l24, $l25, $l26, $l27,
+ $l28, $l29, $l30, $l31,
+ $l32, $l33, $l34, $l35,
+ $l36, $l37, $l38, $l39,
+ $l40, $l41, $l42, $l43,
+ $l44, $l45, $l46, $l47,
+ $l48, $l49, $l50, $l51,
+ $l52, $l53, $l54, $l55,
+ $l56, $l57, $l58, $l59,
+ $l60, $l61, $l62, $l63]
+ )
+ };
+ r
+ }
+ };
+ ($vec:expr, [$($l:expr),*]) => {
+ match $vec {
+ v => shuffle!(v, v, [$($l),*])
+ }
+ }
+}
diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs
index 4d5c92dad0..c6ef497060 100644
--- a/coresimd/ppsv/mod.rs
+++ b/coresimd/ppsv/mod.rs
@@ -86,3 +86,21 @@ impl FromBits for T {
/// Work arounds code generation issues.
mod codegen;
+
+/// Exposes private shuffle intrinsics
+/// used by the `shuffle!` macro.
+#[allow(unused)]
+macro_rules! expose_shuffles {
+ ($_e:expr) => {
+ pub use self::api::shuffles::{
+ __shuffle_vector2,
+ __shuffle_vector4,
+ __shuffle_vector8,
+ __shuffle_vector16,
+ __shuffle_vector32,
+ __shuffle_vector64,
+ };
+ }
+}
+
+vector_impl!([expose_shuffles, 0]);
diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs
index c83c2d4b35..2ba3944bd4 100644
--- a/coresimd/simd_llvm.rs
+++ b/coresimd/simd_llvm.rs
@@ -15,6 +15,8 @@ extern "platform-intrinsic" {
pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U;
pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U;
pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U;
+ pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U;
+ pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U;
pub fn simd_insert(x: T, idx: u32, val: U) -> T;
pub fn simd_extract(x: T, idx: u32) -> U;
diff --git a/crates/coresimd/tests/shuffles.rs b/crates/coresimd/tests/shuffles.rs
new file mode 100644
index 0000000000..a5d7ba95b1
--- /dev/null
+++ b/crates/coresimd/tests/shuffles.rs
@@ -0,0 +1,242 @@
+#![feature(stdsimd)]
+
+#[macro_use]
+extern crate coresimd;
+
+use coresimd::simd::*;
+
+#[test]
+fn shuffle2() {
+ let x = u8x2::new(3, 42);
+ let e = u8x2::new(42, 3);
+ let r = shuffle!(x, [1, 0]);
+ assert_eq!(r, e);
+
+ let y = u8x2::new(7, 12);
+ let e = u8x2::new(42, 12);
+ let r = shuffle!(x, y, [1, 3]);
+ assert_eq!(r, e);
+
+ let x = i16x4::new(1, 2, 3, 4);
+ let e = i16x2::new(2, 4);
+ let r = shuffle!(x, [1, 3]);
+ assert_eq!(r, e);
+
+ let y = i16x4::new(5, 6, 7, 8);
+ let e = i16x2::new(2, 7);
+ let r = shuffle!(x, y, [1, 6]);
+ assert_eq!(r, e);
+}
+
+#[test]
+fn shuffle4() {
+ let x = u8x2::new(3, 42);
+ let e = u8x4::new(42, 3, 42, 42);
+ let r = shuffle!(x, [1, 0, 1, 1]);
+ assert_eq!(r, e);
+
+ let x = u32x4::new(1, 2, 3, 4);
+ let e = u32x4::new(2, 4, 1, 3);
+ let r = shuffle!(x, [1, 3, 0, 2]);
+ assert_eq!(r, e);
+
+ let y = u32x4::new(5, 6, 7, 8);
+ let e = u32x4::new(3, 2, 6, 1);
+ let r = shuffle!(x, y, [2, 1, 5, 0]);
+ assert_eq!(r, e);
+
+ let x = i32x8::new(1, 2, 3, 4, 7, 3, 2, 1);
+ let e = i32x4::new(2, 7, 3, 3);
+ let r = shuffle!(x, [1, 4, 5, 5]);
+ assert_eq!(r, e);
+
+ let y = i32x8::new(5, 6, 7, 8, 1, 5, 2, 3);
+ let e = i32x4::new(3, 5, 7, 3);
+ let r = shuffle!(x, y, [15, 13, 4, 5]);
+ assert_eq!(r, e);
+}
+
+#[test]
+fn shuffle8() {
+ let x = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
+ let e = f32x8::new(2., 8., 1., 3., 5., 2., 7., 4.);
+ let r = shuffle!(x, [1, 7, 0, 2, 4, 1, 6, 3]);
+ assert_eq!(r, e);
+
+ let y = f32x8::new(51., 61., 71., 81., 11., 21., 31., 41.);
+ let e = f32x8::new(2., 8., 51., 3., 71., 41., 7., 4.);
+ let r = shuffle!(x, y, [1, 7, 8, 2, 10, 15, 6, 3]);
+ assert_eq!(r, e);
+}
+
+#[test]
+fn shuffle16() {
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let x = u8x16::new(
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let y = u8x16::new(
+ 16, 17, 18, 19,
+ 20, 21, 22, 23,
+ 24, 25, 26, 27,
+ 28, 29, 30, 31
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let e = u8x16::new(
+ 0, 1, 2, 3,
+ 16, 17, 18, 19,
+ 8, 9, 10, 11,
+ 20, 21, 22, 23
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let r = shuffle!(
+ x, y,
+ [
+ 0, 1, 2, 3,
+ 16, 17, 18, 19,
+ 8, 9, 10, 11,
+ 20, 21, 22, 23
+ ]
+ );
+ assert_eq!(r, e);
+}
+
+#[test]
+fn shuffle32() {
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let x = u8x32::new(
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ 16, 17, 18, 19,
+ 20, 21, 22, 23,
+ 24, 25, 26, 27,
+ 28, 29, 30, 31
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let y = u8x32::new(
+ 32, 33, 34, 35,
+ 36, 37, 38, 39,
+ 40, 41, 42, 43,
+ 44, 45, 46, 47,
+ 48, 49, 50, 51,
+ 52, 53, 54, 55,
+ 56, 57, 58, 59,
+ 60, 61, 62, 63,
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let e = u8x32::new(
+ 0, 1, 2, 3,
+ 32, 33, 34, 35,
+ 8, 9, 10, 11,
+ 36, 37, 38, 39,
+ 8, 9, 10, 11,
+ 40, 41, 42, 43,
+ 12, 13, 14, 15,
+ 44, 45, 46, 47
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let r = shuffle!(
+ x, y,
+ [
+ 0, 1, 2, 3,
+ 32, 33, 34, 35,
+ 8, 9, 10, 11,
+ 36, 37, 38, 39,
+ 8, 9, 10, 11,
+ 40, 41, 42, 43,
+ 12, 13, 14, 15,
+ 44, 45, 46, 47
+ ]
+ );
+ assert_eq!(r, e);
+}
+
+#[test]
+fn shuffle64() {
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let x = u8x64::new(
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ 16, 17, 18, 19,
+ 20, 21, 22, 23,
+ 24, 25, 26, 27,
+ 28, 29, 30, 31,
+ 32, 33, 34, 35,
+ 36, 37, 38, 39,
+ 40, 41, 42, 43,
+ 44, 45, 46, 47,
+ 48, 49, 50, 51,
+ 52, 53, 54, 55,
+ 56, 57, 58, 59,
+ 60, 61, 62, 63,
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let y = u8x64::new(
+ 64, 65, 66, 67,
+ 68, 69, 70, 71,
+ 72, 73, 74, 75,
+ 76, 77, 78, 79,
+ 80, 81, 82, 83,
+ 84, 85, 86, 87,
+ 88, 89, 90, 91,
+ 92, 93, 94, 95,
+ 96, 97, 98, 99,
+ 100, 101, 102, 103,
+ 104, 105, 106, 107,
+ 108, 109, 110, 111,
+ 112, 113, 114, 115,
+ 116, 117, 118, 119,
+ 120, 121, 122, 123,
+ 124, 125, 126, 127,
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let e = u8x64::new(
+ 0, 1, 2, 3,
+ 64, 65, 66, 67,
+ 8, 9, 10, 11,
+ 68, 69, 70, 71,
+ 8, 9, 10, 11,
+ 72, 73, 74, 75,
+ 12, 13, 14, 15,
+ 76, 77, 78, 79,
+ 16, 17, 18, 19,
+ 80, 81, 82, 83,
+ 20, 21, 22, 23,
+ 84, 85, 86, 87,
+ 88, 89, 90, 91,
+ 24, 25, 26, 27,
+ 92, 93, 94, 95,
+ 28, 29, 30, 31
+ );
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ let r = shuffle!(
+ x, y,
+ [
+ 0, 1, 2, 3,
+ 64, 65, 66, 67,
+ 8, 9, 10, 11,
+ 68, 69, 70, 71,
+ 8, 9, 10, 11,
+ 72, 73, 74, 75,
+ 12, 13, 14, 15,
+ 76, 77, 78, 79,
+ 16, 17, 18, 19,
+ 80, 81, 82, 83,
+ 20, 21, 22, 23,
+ 84, 85, 86, 87,
+ 88, 89, 90, 91,
+ 24, 25, 26, 27,
+ 92, 93, 94, 95,
+ 28, 29, 30, 31
+ ]
+ );
+ assert_eq!(r, e);
+}
diff --git a/crates/stdsimd/src/lib.rs b/crates/stdsimd/src/lib.rs
index abee4fcfd4..b7a6bac4bc 100644
--- a/crates/stdsimd/src/lib.rs
+++ b/crates/stdsimd/src/lib.rs
@@ -7,7 +7,7 @@
//!
//! [stdsimd]: https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/
-#![feature(const_fn, integer_atomics, staged_api, stdsimd)]
+#![feature(const_fn, integer_atomics, staged_api, stdsimd, use_extern_macros)]
#![feature(doc_cfg, allow_internal_unstable)]
#![cfg_attr(feature = "cargo-clippy", allow(shadow_reuse))]
#![cfg_attr(target_os = "linux", feature(linkage))]
@@ -28,6 +28,7 @@ extern crate std;
mod stdsimd;
pub use stdsimd::*;
+pub use coresimd::shuffle;
#[allow(unused_imports)]
use __do_not_use_this_import::fs;
diff --git a/crates/stdsimd/tests/shuffle.rs b/crates/stdsimd/tests/shuffle.rs
new file mode 100644
index 0000000000..cd9cd76bdf
--- /dev/null
+++ b/crates/stdsimd/tests/shuffle.rs
@@ -0,0 +1,17 @@
+#![feature(stdsimd)]
+#![deny(warnings)]
+
+#[macro_use]
+extern crate stdsimd;
+
+use stdsimd::simd::*;
+
+fn main() {
+ // check that shuffle! does not produce warnings when used in an unsafe block
+ unsafe {
+ let a = f32x4::new(0., 1., 2., 3.);
+ let b = f32x4::new(4., 5., 6., 7.);
+ let e = f32x4::new(0., 2., 4., 6.);
+ assert_eq!(e, shuffle!(a, b, [0, 2, 4, 6]));
+ }
+}