diff --git a/.github/workflows/aes.yml b/.github/workflows/aes.yml index 2053daf4..a7e7513e 100644 --- a/.github/workflows/aes.yml +++ b/.github/workflows/aes.yml @@ -219,7 +219,7 @@ jobs: cross test --package aes --target ${{ matrix.target }} cross test --package aes --target ${{ matrix.target }} --features hazmat - # ARMv8 cross-compiled tests for AES intrinsics (nightly-only) + # ARMv8 cross-compiled tests for AES intrinsics armv8: env: RUSTFLAGS: "-Dwarnings --cfg aes_armv8" @@ -227,7 +227,7 @@ jobs: matrix: include: - target: aarch64-unknown-linux-gnu - rust: nightly + rust: 1.61.0 # MSRV for `aes_armv8` runs-on: ubuntu-latest # Cross mounts only current package, i.e. by default it ignores workspace's Cargo.toml defaults: diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs index 4ecc471d..0bd92877 100644 --- a/aes/src/armv8.rs +++ b/aes/src/armv8.rs @@ -14,6 +14,7 @@ pub(crate) mod hazmat; mod encdec; mod expand; +mod intrinsics; #[cfg(test)] mod test_expand; @@ -222,7 +223,7 @@ macro_rules! define_aes_impl { impl From<&$name_enc> for $name_dec { fn from(enc: &$name_enc) -> $name_dec { let mut round_keys = enc.round_keys; - inv_expanded_keys(&mut round_keys); + unsafe { inv_expanded_keys(&mut round_keys) }; Self { round_keys } } } diff --git a/aes/src/armv8/encdec.rs b/aes/src/armv8/encdec.rs index ecf7d5c3..09c59cee 100644 --- a/aes/src/armv8/encdec.rs +++ b/aes/src/armv8/encdec.rs @@ -4,6 +4,12 @@ use crate::{Block, Block8}; use cipher::inout::InOut; use core::arch::aarch64::*; +// Stable "polyfills" for unstable core::arch::aarch64 intrinsics +// TODO(tarcieri): remove when these intrinsics have been stabilized +use super::intrinsics::{ + vaesdq_u8, vaesdq_u8_and_vaesimcq_u8, vaeseq_u8, vaeseq_u8_and_vaesmcq_u8, +}; + /// Perform AES encryption using the given expanded keys. #[target_feature(enable = "aes")] #[target_feature(enable = "neon")] @@ -19,11 +25,8 @@ pub(super) unsafe fn encrypt1( let mut state = vld1q_u8(in_ptr as *const u8); for k in expanded_keys.iter().take(rounds - 1) { - // AES single round encryption - state = vaeseq_u8(state, *k); - - // AES mix columns - state = vaesmcq_u8(state); + // AES single round encryption and mix columns + state = vaeseq_u8_and_vaesmcq_u8(state, *k); } // AES single round encryption @@ -62,11 +65,8 @@ pub(super) unsafe fn encrypt8( for k in expanded_keys.iter().take(rounds - 1) { for i in 0..8 { - // AES single round encryption - state[i] = vaeseq_u8(state[i], *k); - - // AES mix columns - state[i] = vaesmcq_u8(state[i]); + // AES single round encryption and mix columns + state[i] = vaeseq_u8_and_vaesmcq_u8(state[i], *k); } } @@ -95,11 +95,8 @@ pub(super) unsafe fn decrypt1( let mut state = vld1q_u8(in_ptr as *const u8); for k in expanded_keys.iter().take(rounds - 1) { - // AES single round decryption - state = vaesdq_u8(state, *k); - - // AES inverse mix columns - state = vaesimcq_u8(state); + // AES single round decryption and inverse mix columns + state = vaesdq_u8_and_vaesimcq_u8(state, *k); } // AES single round decryption @@ -138,11 +135,8 @@ pub(super) unsafe fn decrypt8( for k in expanded_keys.iter().take(rounds - 1) { for i in 0..8 { - // AES single round decryption - state[i] = vaesdq_u8(state[i], *k); - - // AES inverse mix columns - state[i] = vaesimcq_u8(state[i]); + // AES single round decryption and inverse mix columns + state[i] = vaesdq_u8_and_vaesimcq_u8(state[i], *k); } } diff --git a/aes/src/armv8/expand.rs b/aes/src/armv8/expand.rs index 8e5cf88c..fa1b1b00 100644 --- a/aes/src/armv8/expand.rs +++ b/aes/src/armv8/expand.rs @@ -2,6 +2,10 @@ use core::{arch::aarch64::*, mem, slice}; +// Stable "polyfills" for unstable core::arch::aarch64 intrinsics +// TODO(tarcieri): remove when these intrinsics have been stabilized +use super::intrinsics::{vaeseq_u8, vaesimcq_u8}; + /// There are 4 AES words in a block. const BLOCK_WORDS: usize = 4; @@ -37,9 +41,9 @@ pub(super) fn expand_key(key: &[u8; L]) -> [uint let mut word = ek_words[i - 1]; if i % nk == 0 { - word = sub_word(word).rotate_right(8) ^ ROUND_CONSTS[i / nk - 1]; + word = unsafe { sub_word(word) }.rotate_right(8) ^ ROUND_CONSTS[i / nk - 1]; } else if nk > 6 && i % nk == 4 { - word = sub_word(word) + word = unsafe { sub_word(word) }; } ek_words[i] = ek_words[i - nk] ^ word; @@ -52,26 +56,24 @@ pub(super) fn expand_key(key: &[u8; L]) -> [uint /// /// This is the reverse of the encryption keys, with the Inverse Mix Columns /// operation applied to all but the first and last expanded key. -#[inline] -pub(super) fn inv_expanded_keys(expanded_keys: &mut [uint8x16_t; N]) { +#[target_feature(enable = "aes")] +pub(super) unsafe fn inv_expanded_keys(expanded_keys: &mut [uint8x16_t; N]) { assert!(N == 11 || N == 13 || N == 15); for ek in expanded_keys.iter_mut().take(N - 1).skip(1) { - unsafe { *ek = vaesimcq_u8(*ek) } + *ek = vaesimcq_u8(*ek); } expanded_keys.reverse(); } /// Sub bytes for a single AES word: used for key expansion. -#[inline(always)] -fn sub_word(input: u32) -> u32 { - unsafe { - let input = vreinterpretq_u8_u32(vdupq_n_u32(input)); +#[target_feature(enable = "aes")] +unsafe fn sub_word(input: u32) -> u32 { + let input = vreinterpretq_u8_u32(vdupq_n_u32(input)); - // AES single round encryption (with a "round" key of all zeros) - let sub_input = vaeseq_u8(input, vdupq_n_u8(0)); + // AES single round encryption (with a "round" key of all zeros) + let sub_input = vaeseq_u8(input, vdupq_n_u8(0)); - vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0) - } + vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0) } diff --git a/aes/src/armv8/hazmat.rs b/aes/src/armv8/hazmat.rs index f094243c..3e078cfe 100644 --- a/aes/src/armv8/hazmat.rs +++ b/aes/src/armv8/hazmat.rs @@ -7,6 +7,9 @@ use crate::{Block, Block8}; use core::arch::aarch64::*; +// Stable "polyfills" for unstable core::arch::aarch64 intrinsics +use super::intrinsics::{vaesdq_u8, vaeseq_u8, vaesimcq_u8, vaesmcq_u8}; + /// AES cipher (encrypt) round function. #[allow(clippy::cast_ptr_alignment)] #[target_feature(enable = "aes")] diff --git a/aes/src/armv8/intrinsics.rs b/aes/src/armv8/intrinsics.rs new file mode 100644 index 00000000..752af492 --- /dev/null +++ b/aes/src/armv8/intrinsics.rs @@ -0,0 +1,93 @@ +//! Stable "polyfills" for unstable `core::arch::aarch64` intrinsics which use +//! `asm!` internally to allow use on stable Rust. +// TODO(tarcieri): remove when these intrinsics have been stabilized + +use core::arch::{aarch64::uint8x16_t, asm}; + +/// AES single round encryption. +#[inline] +#[target_feature(enable = "aes")] +pub(super) unsafe fn vaeseq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + asm!( + "AESE {d:v}.16B, {k:v}.16B", + d = inout(vreg) data, + k = in(vreg) key, + options(pure, nomem, nostack, preserves_flags) + ); + data +} + +/// AES single round decryption. +#[inline] +#[target_feature(enable = "aes")] +pub(super) unsafe fn vaesdq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + asm!( + "AESD {d:v}.16B, {k:v}.16B", + d = inout(vreg) data, + k = in(vreg) key, + options(pure, nomem, nostack, preserves_flags) + ); + data +} + +/// AES mix columns. +#[cfg(feature = "hazmat")] +#[inline] +#[target_feature(enable = "aes")] +pub(super) unsafe fn vaesmcq_u8(mut data: uint8x16_t) -> uint8x16_t { + asm!( + "AESMC {d:v}.16B, {d:v}.16B", + d = inout(vreg) data, + options(pure, nomem, nostack, preserves_flags) + ); + data +} + +/// AES inverse mix columns. +#[inline] +#[target_feature(enable = "aes")] +pub(super) unsafe fn vaesimcq_u8(mut data: uint8x16_t) -> uint8x16_t { + asm!( + "AESIMC {d:v}.16B, {d:v}.16B", + d = inout(vreg) data, + options(pure, nomem, nostack, preserves_flags) + ); + data +} + +/// AES single round encryption combined with mix columns. +/// +/// These two instructions are combined into a single assembly block to ensure +/// that instructions fuse properly. +#[inline] +#[target_feature(enable = "aes")] +pub(super) unsafe fn vaeseq_u8_and_vaesmcq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + asm!( + "AESE {d:v}.16B, {k:v}.16B", + "AESMC {d:v}.16B, {d:v}.16B", + d = inout(vreg) data, + k = in(vreg) key, + options(pure, nomem, nostack, preserves_flags) + ); + data +} + +/// AES single round decryption combined with mix columns. +/// +/// These two instructions are combined into a single assembly block to ensure +/// that instructions fuse properly. +#[inline] +#[target_feature(enable = "aes")] +pub(super) unsafe fn vaesdq_u8_and_vaesimcq_u8( + mut data: uint8x16_t, + key: uint8x16_t, +) -> uint8x16_t { + asm!( + "AESD {d:v}.16B, {k:v}.16B", + "AESIMC {d:v}.16B, {d:v}.16B", + d = inout(vreg) data, + k = in(vreg) key, + options(pure, nomem, nostack, preserves_flags) + ); + data +} diff --git a/aes/src/armv8/test_expand.rs b/aes/src/armv8/test_expand.rs index c52bda74..a707a55c 100644 --- a/aes/src/armv8/test_expand.rs +++ b/aes/src/armv8/test_expand.rs @@ -113,7 +113,7 @@ fn aes128_key_expansion() { #[test] fn aes128_key_expansion_inv() { let mut ek = load_expanded_keys(AES128_EXP_KEYS); - inv_expanded_keys(&mut ek); + unsafe { inv_expanded_keys(&mut ek) }; assert_eq!(store_expanded_keys(ek), AES128_EXP_INVKEYS); } diff --git a/aes/src/lib.rs b/aes/src/lib.rs index f43b21c9..2e668d8b 100644 --- a/aes/src/lib.rs +++ b/aes/src/lib.rs @@ -26,11 +26,11 @@ //! backend at the cost of decreased performance (using a modified form of //! the fixslicing technique called "semi-fixslicing"). //! -//! ## ARMv8 intrinsics (nightly-only) +//! ## ARMv8 intrinsics (Rust 1.61+) //! On `aarch64` targets including `aarch64-apple-darwin` (Apple M1) and Linux //! targets such as `aarch64-unknown-linux-gnu` and `aarch64-unknown-linux-musl`, //! support for using AES intrinsics provided by the ARMv8 Cryptography Extensions -//! is available when using the nightly compiler, and can be enabled using the +//! is available when using Rust 1.61 or above, and can be enabled using the //! `aes_armv8` configuration flag. //! //! On Linux and macOS, when the `aes_armv8` flag is enabled support for AES @@ -99,7 +99,7 @@ //! //! You can modify crate using the following configuration flags: //! -//! - `aes_armv8`: enable ARMv8 AES intrinsics (nightly-only). +//! - `aes_armv8`: enable ARMv8 AES intrinsics (Rust 1.61+). //! - `aes_force_soft`: force software implementation. //! - `aes_compact`: reduce code size at the cost of slower performance //! (affects only software backend). @@ -119,7 +119,6 @@ )] #![cfg_attr(docsrs, feature(doc_cfg))] #![warn(missing_docs, rust_2018_idioms)] -#![cfg_attr(all(aes_armv8, target_arch = "aarch64"), feature(stdsimd))] #[cfg(feature = "hazmat")] #[cfg_attr(docsrs, doc(cfg(feature = "hazmat")))]