diff --git a/.github/workflows/aes.yml b/.github/workflows/aes.yml index c201389e2..cc8907c48 100644 --- a/.github/workflows/aes.yml +++ b/.github/workflows/aes.yml @@ -36,12 +36,12 @@ jobs: target: ${{ matrix.target }} profile: minimal override: true - - run: cargo check --all-features + - run: cargo check --features compact,ctr,force-soft - run: cargo build --release --target ${{ matrix.target }} - run: cargo build --release --target ${{ matrix.target }} --features compact - run: cargo build --release --target ${{ matrix.target }} --features ctr - run: cargo build --release --target ${{ matrix.target }} --features force-soft - - run: cargo build --release --target ${{ matrix.target }} --all-features + - run: cargo build --release --target ${{ matrix.target }} --features compact,ctr,force-soft # Tests for the AES-NI backend aesni: @@ -79,7 +79,6 @@ jobs: - run: cargo test --release --target ${{ matrix.target }} --features compact - run: cargo test --release --target ${{ matrix.target }} --features ctr - run: cargo test --release --target ${{ matrix.target }} --features force-soft - - run: cargo test --release --target ${{ matrix.target }} --all-features # Tests for CPU feature autodetection with fallback to portable software implementation autodetect: @@ -144,7 +143,7 @@ jobs: - run: cargo test --release --target ${{ matrix.target }} --features force-soft - run: cargo test --release --target ${{ matrix.target }} --features force-soft,compact - run: cargo test --release --target ${{ matrix.target }} --features force-soft,ctr - - run: cargo test --release --target ${{ matrix.target }} --all-features + - run: cargo build --release --target ${{ matrix.target }} --features compact,ctr,force-soft # Cross-compiled tests cross: @@ -162,7 +161,6 @@ jobs: rust: 1.49.0 # MSRV - target: powerpc-unknown-linux-gnu rust: stable - runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 @@ -178,4 +176,40 @@ jobs: - run: cross test --release --target ${{ matrix.target }} --features compact - run: cross test --release --target ${{ matrix.target }} --features ctr - run: cross test --release --target ${{ matrix.target }} --features force-soft + - run: cross test --release --target ${{ matrix.target }} --features compact,ctr,force-soft + + # ARMv8 cross-compiled tests for AES intrinsics (nightly-only) + armv8: + strategy: + matrix: + include: + - target: aarch64-unknown-linux-gnu + rust: nightly + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - run: ${{ matrix.deps }} + - uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + profile: minimal + override: true + - run: cargo install cross + - run: cross test --release --target ${{ matrix.target }} + - run: cross test --release --target ${{ matrix.target }} --features armv8,compact + - run: cross test --release --target ${{ matrix.target }} --features armv8,ctr + - run: cross test --release --target ${{ matrix.target }} --features armv8,force-soft - run: cross test --release --target ${{ matrix.target }} --all-features + + clippy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions-rs/toolchain@v1 + with: + toolchain: 1.49.0 # MSRV + components: clippy + override: true + profile: minimal + - run: cargo clippy --features compact,ctr,force-soft -- -D warnings diff --git a/.github/workflows/workspace.yml b/.github/workflows/workspace.yml index e9cb4215d..79bb4c61f 100644 --- a/.github/workflows/workspace.yml +++ b/.github/workflows/workspace.yml @@ -20,7 +20,7 @@ jobs: components: clippy override: true profile: minimal - - run: cargo clippy --all --all-features -- -D warnings + - run: cargo clippy --all --exclude aes --all-features -- -D warnings rustfmt: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index a8ea09580..2f7896d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1 @@ -/target/ -aes/aesni/target/ -aes/aes/target/ +target/ diff --git a/Cargo.lock b/Cargo.lock index 78212d69a..9750c05f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,6 +10,7 @@ dependencies = [ "cipher", "cpufeatures", "ctr", + "hex-literal", "opaque-debug", ] @@ -78,9 +79,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "281f563b2c3a0e535ab12d81d3c5859045795256ad269afa7c19542585b68f93" +checksum = "ed00c67cb5d0a7d64a44f6ad2668db7e7530311dd53ea79bcd4fb022c64911c8" dependencies = [ "libc", ] diff --git a/aes/Cargo.lock b/aes/Cargo.lock new file mode 100644 index 000000000..e40532265 --- /dev/null +++ b/aes/Cargo.lock @@ -0,0 +1,114 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aes" +version = "0.7.1" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", + "ctr", + "hex-literal", + "opaque-debug", +] + +[[package]] +name = "blobby" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc52553543ecb104069b0ff9e0fcc5c739ad16202935528a112d974e8f1a4ee8" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cipher" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +dependencies = [ + "blobby", + "generic-array", +] + +[[package]] +name = "cpufeatures" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed00c67cb5d0a7d64a44f6ad2668db7e7530311dd53ea79bcd4fb022c64911c8" +dependencies = [ + "libc", +] + +[[package]] +name = "ctr" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a232f92a03f37dd7d7dd2adc67166c77e9cd88de5b019b9a9eecfaeaf7bfd481" +dependencies = [ + "cipher", +] + +[[package]] +name = "generic-array" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hex-literal" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "961de220ec9a91af2e1e5bd80d02109155695e516771762381ef8581317066e0" +dependencies = [ + "hex-literal-impl", + "proc-macro-hack", +] + +[[package]] +name = "hex-literal-impl" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "853f769599eb31de176303197b7ba4973299c38c7a7604a6bc88c3eef05b9b46" +dependencies = [ + "proc-macro-hack", +] + +[[package]] +name = "libc" +version = "0.2.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "typenum" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" diff --git a/aes/Cargo.toml b/aes/Cargo.toml index be1d9da37..2412aed51 100644 --- a/aes/Cargo.toml +++ b/aes/Cargo.toml @@ -22,12 +22,14 @@ opaque-debug = "0.3" [dev-dependencies] cipher = { version = "0.3", features = ["dev"] } +hex-literal = "0.2" -[target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies] -cpufeatures = "0.1" +[target.'cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))'.dependencies] +cpufeatures = "0.1.4" [features] -compact = [] # Reduce code size at the cost of slower performance +armv8 = [] # Enable ARMv8 AES intrinsics (nightly-only) +compact = [] # Reduce code size at the cost of slower performance force-soft = [] # Disable support for AES hardware intrinsics [package.metadata.docs.rs] diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs new file mode 100644 index 000000000..9567d7340 --- /dev/null +++ b/aes/src/armv8.rs @@ -0,0 +1,577 @@ +//! AES block cipher implementation using the ARMv8 Cryptography Extensions. +//! +//! Based on this C intrinsics implementation: +//! +//! +//! Original C written and placed in public domain by Jeffrey Walton. +//! Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and +//! Barry O'Rourke for the mbedTLS project. + +#![allow(clippy::needless_range_loop)] + +use crate::{Block, ParBlocks}; +use cipher::{ + consts::{U16, U24, U32, U8}, + generic_array::GenericArray, + BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher, +}; +use core::{arch::aarch64::*, convert::TryInto, mem, slice}; + +/// There are 4 AES words in a block. +const BLOCK_WORDS: usize = 4; + +/// The AES (nee Rijndael) notion of a word is always 32-bits, or 4-bytes. +const WORD_SIZE: usize = 4; + +/// AES round constants. +const ROUND_CONSTS: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36]; + +macro_rules! define_aes_impl { + ( + $name:ident, + $name_enc:ident, + $name_dec:ident, + $key_size:ty, + $rounds:tt, + $doc:expr + ) => { + #[doc=$doc] + #[doc = "block cipher"] + #[derive(Clone)] + pub struct $name { + encrypt: $name_enc, + decrypt: $name_dec, + } + + impl NewBlockCipher for $name { + type KeySize = $key_size; + + #[inline] + fn new(key: &GenericArray) -> Self { + let encrypt = $name_enc::new(key); + let decrypt = $name_dec::from(&encrypt); + Self { encrypt, decrypt } + } + } + + impl BlockCipher for $name { + type BlockSize = U16; + type ParBlocks = U8; + } + + impl BlockEncrypt for $name { + #[inline] + fn encrypt_block(&self, block: &mut Block) { + self.encrypt.encrypt_block(block) + } + + #[inline] + fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) { + self.encrypt.encrypt_par_blocks(blocks) + } + } + + impl BlockDecrypt for $name { + #[inline] + fn decrypt_block(&self, block: &mut Block) { + self.decrypt.decrypt_block(block) + } + + #[inline] + fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) { + self.decrypt.decrypt_par_blocks(blocks) + } + } + + #[doc=$doc] + #[doc = "block cipher (encrypt-only)"] + #[derive(Clone)] + pub struct $name_enc { + round_keys: [uint8x16_t; $rounds], + } + + impl NewBlockCipher for $name_enc { + type KeySize = $key_size; + + fn new(key: &GenericArray) -> Self { + Self { + round_keys: expand_key(key.as_ref()), + } + } + } + + impl BlockCipher for $name_enc { + type BlockSize = U16; + type ParBlocks = U8; + } + + impl BlockEncrypt for $name_enc { + fn encrypt_block(&self, block: &mut Block) { + unsafe { encrypt(&self.round_keys, block) } + } + + fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) { + unsafe { encrypt8(&self.round_keys, blocks) } + } + } + + #[doc=$doc] + #[doc = "block cipher (decrypt-only)"] + #[derive(Clone)] + pub struct $name_dec { + round_keys: [uint8x16_t; $rounds], + } + + impl NewBlockCipher for $name_dec { + type KeySize = $key_size; + + fn new(key: &GenericArray) -> Self { + $name_enc::new(key).into() + } + } + + impl From<$name_enc> for $name_dec { + fn from(enc: $name_enc) -> $name_dec { + Self::from(&enc) + } + } + + impl From<&$name_enc> for $name_dec { + fn from(enc: &$name_enc) -> $name_dec { + let mut round_keys = enc.round_keys; + inverse_expanded_keys(&mut round_keys); + Self { round_keys } + } + } + + impl BlockCipher for $name_dec { + type BlockSize = U16; + type ParBlocks = U8; + } + + impl BlockDecrypt for $name_dec { + fn decrypt_block(&self, block: &mut Block) { + unsafe { decrypt(&self.round_keys, block) } + } + + fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) { + unsafe { decrypt8(&self.round_keys, blocks) } + } + } + + opaque_debug::implement!($name); + opaque_debug::implement!($name_enc); + opaque_debug::implement!($name_dec); + }; +} + +define_aes_impl!(Aes128, Aes128Enc, Aes128Dec, U16, 11, "AES-128"); +define_aes_impl!(Aes192, Aes192Enc, Aes192Dec, U24, 13, "AES-192"); +define_aes_impl!(Aes256, Aes256Enc, Aes256Dec, U32, 15, "AES-256"); + +/// AES key expansion +// TODO(tarcieri): big endian support? +#[inline] +fn expand_key(key: &[u8; L]) -> [uint8x16_t; N] { + assert!((L == 16 && N == 11) || (L == 24 && N == 13) || (L == 32 && N == 15)); + + let mut expanded_keys: [uint8x16_t; N] = unsafe { mem::zeroed() }; + + // TODO(tarcieri): construct expanded keys using `vreinterpretq_u8_u32` + let ek_words = unsafe { + slice::from_raw_parts_mut(expanded_keys.as_mut_ptr() as *mut u32, N * BLOCK_WORDS) + }; + + for (i, chunk) in key.chunks_exact(WORD_SIZE).enumerate() { + ek_words[i] = u32::from_ne_bytes(chunk.try_into().unwrap()); + } + + // From "The Rijndael Block Cipher" Section 4.1: + // > The number of columns of the Cipher Key is denoted by `Nk` and is + // > equal to the key length divided by 32 [bits]. + let nk = L / WORD_SIZE; + + for i in nk..(N * BLOCK_WORDS) { + let mut word = ek_words[i - 1]; + + if i % nk == 0 { + word = sub_word(word).rotate_right(8) ^ ROUND_CONSTS[i / nk - 1]; + } else if nk > 6 && i % nk == 4 { + word = sub_word(word) + } + + ek_words[i] = ek_words[i - nk] ^ word; + } + + expanded_keys +} + +/// Compute inverse expanded keys (for decryption). +/// +/// This is the reverse of the encryption keys, with the Inverse Mix Columns +/// operation applied to all but the first and last expanded key. +#[inline] +fn inverse_expanded_keys(expanded_keys: &mut [uint8x16_t; N]) { + assert!(N == 11 || N == 13 || N == 15); + + for ek in expanded_keys.iter_mut().take(N - 1).skip(1) { + unsafe { *ek = vaesimcq_u8(*ek) } + } + + expanded_keys.reverse(); +} + +/// Perform AES encryption using the given expanded keys. +#[target_feature(enable = "crypto")] +#[target_feature(enable = "neon")] +unsafe fn encrypt(expanded_keys: &[uint8x16_t; N], block: &mut Block) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = vld1q_u8(block.as_ptr()); + + for k in expanded_keys.iter().take(rounds - 1) { + // AES single round encryption + state = vaeseq_u8(state, *k); + + // AES mix columns + state = vaesmcq_u8(state); + } + + // AES single round encryption + state = vaeseq_u8(state, expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state = veorq_u8(state, expanded_keys[rounds]); + + vst1q_u8(block.as_mut_ptr(), state); +} + +/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys. +#[target_feature(enable = "crypto")] +#[target_feature(enable = "neon")] +unsafe fn encrypt8(expanded_keys: &[uint8x16_t; N], blocks: &mut ParBlocks) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = [ + vld1q_u8(blocks[0].as_ptr()), + vld1q_u8(blocks[1].as_ptr()), + vld1q_u8(blocks[2].as_ptr()), + vld1q_u8(blocks[3].as_ptr()), + vld1q_u8(blocks[4].as_ptr()), + vld1q_u8(blocks[5].as_ptr()), + vld1q_u8(blocks[6].as_ptr()), + vld1q_u8(blocks[7].as_ptr()), + ]; + + for k in expanded_keys.iter().take(rounds - 1) { + for i in 0..8 { + // AES single round encryption + state[i] = vaeseq_u8(state[i], *k); + + // AES mix columns + state[i] = vaesmcq_u8(state[i]); + } + } + + for i in 0..8 { + // AES single round encryption + state[i] = vaeseq_u8(state[i], expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state[i] = veorq_u8(state[i], expanded_keys[rounds]); + + vst1q_u8(blocks[i].as_mut_ptr(), state[i]); + } +} + +/// Perform AES decryption using the given expanded keys. +#[target_feature(enable = "crypto")] +#[target_feature(enable = "neon")] +unsafe fn decrypt(expanded_keys: &[uint8x16_t; N], block: &mut Block) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = vld1q_u8(block.as_ptr()); + + for k in expanded_keys.iter().take(rounds - 1) { + // AES single round decryption + state = vaesdq_u8(state, *k); + + // AES inverse mix columns + state = vaesimcq_u8(state); + } + + // AES single round decryption + state = vaesdq_u8(state, expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state = veorq_u8(state, expanded_keys[rounds]); + + vst1q_u8(block.as_mut_ptr(), state); +} + +/// Perform parallel AES decryption 8-blocks-at-a-time using the given expanded keys. +#[target_feature(enable = "crypto")] +#[target_feature(enable = "neon")] +unsafe fn decrypt8(expanded_keys: &[uint8x16_t; N], blocks: &mut ParBlocks) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = [ + vld1q_u8(blocks[0].as_ptr()), + vld1q_u8(blocks[1].as_ptr()), + vld1q_u8(blocks[2].as_ptr()), + vld1q_u8(blocks[3].as_ptr()), + vld1q_u8(blocks[4].as_ptr()), + vld1q_u8(blocks[5].as_ptr()), + vld1q_u8(blocks[6].as_ptr()), + vld1q_u8(blocks[7].as_ptr()), + ]; + + for k in expanded_keys.iter().take(rounds - 1) { + for i in 0..8 { + // AES single round decryption + state[i] = vaesdq_u8(state[i], *k); + + // AES inverse mix columns + state[i] = vaesimcq_u8(state[i]); + } + } + + for i in 0..8 { + // AES single round decryption + state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state[i] = veorq_u8(state[i], expanded_keys[rounds]); + + vst1q_u8(blocks[i].as_mut_ptr(), state[i]); + } +} + +/// Sub bytes for a single AES word: used for key expansion. +#[inline(always)] +fn sub_word(input: u32) -> u32 { + unsafe { + let input = vreinterpretq_u8_u32(vdupq_n_u32(input)); + + // AES single round encryption (with a "round" key of all zeros) + let sub_input = vaeseq_u8(input, vdupq_n_u8(0)); + + vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0) + } +} + +// TODO(tarcieri): use `stdarch` intrinsic for this when it becomes available +#[inline(always)] +unsafe fn vst1q_u8(dst: *mut u8, src: uint8x16_t) { + dst.copy_from_nonoverlapping(&src as *const _ as *const u8, 16); +} + +#[cfg(test)] +mod tests { + use super::{ + decrypt, decrypt8, encrypt, encrypt8, expand_key, inverse_expanded_keys, vst1q_u8, + ParBlocks, + }; + use core::{arch::aarch64::*, convert::TryInto}; + use hex_literal::hex; + + /// FIPS 197, Appendix A.1: AES-128 Cipher Key + /// user input, unaligned buffer + const AES128_KEY: [u8; 16] = hex!("2b7e151628aed2a6abf7158809cf4f3c"); + + /// FIPS 197 Appendix A.1: Expansion of a 128-bit Cipher Key + /// library controlled, aligned buffer + const AES128_EXP_KEYS: [[u8; 16]; 11] = [ + AES128_KEY, + hex!("a0fafe1788542cb123a339392a6c7605"), + hex!("f2c295f27a96b9435935807a7359f67f"), + hex!("3d80477d4716fe3e1e237e446d7a883b"), + hex!("ef44a541a8525b7fb671253bdb0bad00"), + hex!("d4d1c6f87c839d87caf2b8bc11f915bc"), + hex!("6d88a37a110b3efddbf98641ca0093fd"), + hex!("4e54f70e5f5fc9f384a64fb24ea6dc4f"), + hex!("ead27321b58dbad2312bf5607f8d292f"), + hex!("ac7766f319fadc2128d12941575c006e"), + hex!("d014f9a8c9ee2589e13f0cc8b6630ca6"), + ]; + + /// Inverse expanded keys for [`AES128_EXPANDED_KEYS`] + const AES128_EXP_INVKEYS: [[u8; 16]; 11] = [ + hex!("d014f9a8c9ee2589e13f0cc8b6630ca6"), + hex!("0c7b5a631319eafeb0398890664cfbb4"), + hex!("df7d925a1f62b09da320626ed6757324"), + hex!("12c07647c01f22c7bc42d2f37555114a"), + hex!("6efcd876d2df54807c5df034c917c3b9"), + hex!("6ea30afcbc238cf6ae82a4b4b54a338d"), + hex!("90884413d280860a12a128421bc89739"), + hex!("7c1f13f74208c219c021ae480969bf7b"), + hex!("cc7505eb3e17d1ee82296c51c9481133"), + hex!("2b3708a7f262d405bc3ebdbf4b617d62"), + AES128_KEY, + ]; + + /// FIPS 197, Appendix A.2: AES-192 Cipher Key + /// user input, unaligned buffer + const AES192_KEY: [u8; 24] = hex!("8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b"); + + /// FIPS 197 Appendix A.2: Expansion of a 192-bit Cipher Key + /// library controlled, aligned buffer + const AES192_EXP_KEYS: [[u8; 16]; 13] = [ + hex!("8e73b0f7da0e6452c810f32b809079e5"), + hex!("62f8ead2522c6b7bfe0c91f72402f5a5"), + hex!("ec12068e6c827f6b0e7a95b95c56fec2"), + hex!("4db7b4bd69b5411885a74796e92538fd"), + hex!("e75fad44bb095386485af05721efb14f"), + hex!("a448f6d94d6dce24aa326360113b30e6"), + hex!("a25e7ed583b1cf9a27f939436a94f767"), + hex!("c0a69407d19da4e1ec1786eb6fa64971"), + hex!("485f703222cb8755e26d135233f0b7b3"), + hex!("40beeb282f18a2596747d26b458c553e"), + hex!("a7e1466c9411f1df821f750aad07d753"), + hex!("ca4005388fcc5006282d166abc3ce7b5"), + hex!("e98ba06f448c773c8ecc720401002202"), + ]; + + /// FIPS 197, Appendix A.3: AES-256 Cipher Key + /// user input, unaligned buffer + const AES256_KEY: [u8; 32] = + hex!("603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4"); + + /// FIPS 197 Appendix A.3: Expansion of a 256-bit Cipher Key + /// library controlled, aligned buffer + const AES256_EXP_KEYS: [[u8; 16]; 15] = [ + hex!("603deb1015ca71be2b73aef0857d7781"), + hex!("1f352c073b6108d72d9810a30914dff4"), + hex!("9ba354118e6925afa51a8b5f2067fcde"), + hex!("a8b09c1a93d194cdbe49846eb75d5b9a"), + hex!("d59aecb85bf3c917fee94248de8ebe96"), + hex!("b5a9328a2678a647983122292f6c79b3"), + hex!("812c81addadf48ba24360af2fab8b464"), + hex!("98c5bfc9bebd198e268c3ba709e04214"), + hex!("68007bacb2df331696e939e46c518d80"), + hex!("c814e20476a9fb8a5025c02d59c58239"), + hex!("de1369676ccc5a71fa2563959674ee15"), + hex!("5886ca5d2e2f31d77e0af1fa27cf73c3"), + hex!("749c47ab18501ddae2757e4f7401905a"), + hex!("cafaaae3e4d59b349adf6acebd10190d"), + hex!("fe4890d1e6188d0b046df344706c631e"), + ]; + + /// FIPS 197, Appendix B input + /// user input, unaligned buffer + const INPUT: [u8; 16] = hex!("3243f6a8885a308d313198a2e0370734"); + + /// FIPS 197, Appendix B output + const EXPECTED: [u8; 16] = hex!("3925841d02dc09fbdc118597196a0b32"); + + fn load_expanded_keys(input: [[u8; 16]; N]) -> [uint8x16_t; N] { + let mut output = [unsafe { vdupq_n_u8(0) }; N]; + + for (src, dst) in input.iter().zip(output.iter_mut()) { + *dst = unsafe { vld1q_u8(src.as_ptr()) } + } + + output + } + + fn store_expanded_keys(input: [uint8x16_t; N]) -> [[u8; 16]; N] { + let mut output = [[0u8; 16]; N]; + + for (src, dst) in input.iter().zip(output.iter_mut()) { + unsafe { vst1q_u8(dst.as_mut_ptr(), *src) } + } + + output + } + + #[test] + fn aes128_key_expansion() { + let ek = expand_key(&AES128_KEY); + assert_eq!(store_expanded_keys(ek), AES128_EXP_KEYS); + } + + #[test] + fn aes128_key_expansion_inv() { + let mut ek = load_expanded_keys(AES128_EXP_KEYS); + inverse_expanded_keys(&mut ek); + assert_eq!(store_expanded_keys(ek), AES128_EXP_INVKEYS); + } + + #[test] + fn aes192_key_expansion() { + let ek = expand_key(&AES192_KEY); + assert_eq!(store_expanded_keys(ek), AES192_EXP_KEYS); + } + + #[test] + fn aes256_key_expansion() { + let ek = expand_key(&AES256_KEY); + assert_eq!(store_expanded_keys(ek), AES256_EXP_KEYS); + } + + #[test] + fn aes128_encrypt() { + // Intentionally misaligned block + let mut block = [0u8; 19]; + block[3..].copy_from_slice(&INPUT); + + unsafe { + encrypt( + &load_expanded_keys(AES128_EXP_KEYS), + (&mut block[3..]).try_into().unwrap(), + ) + }; + + assert_eq!(&block[3..], &EXPECTED); + } + + #[test] + fn aes128_encrypt8() { + let mut blocks = ParBlocks::default(); + + for block in &mut blocks { + block.copy_from_slice(&INPUT); + } + + unsafe { encrypt8(&load_expanded_keys(AES128_EXP_KEYS), &mut blocks) }; + + for block in &blocks { + assert_eq!(block.as_slice(), &EXPECTED); + } + } + + #[test] + fn aes128_decrypt() { + // Intentionally misaligned block + let mut block = [0u8; 19]; + block[3..].copy_from_slice(&EXPECTED); + + unsafe { + decrypt( + &load_expanded_keys(AES128_EXP_INVKEYS), + (&mut block[3..]).try_into().unwrap(), + ) + }; + + assert_eq!(&block[3..], &INPUT); + } + + #[test] + fn aes128_decrypt8() { + let mut blocks = ParBlocks::default(); + + for block in &mut blocks { + block.copy_from_slice(&EXPECTED); + } + + unsafe { decrypt8(&load_expanded_keys(AES128_EXP_INVKEYS), &mut blocks) }; + + for block in &blocks { + assert_eq!(block.as_slice(), &INPUT); + } + } +} diff --git a/aes/src/autodetect.rs b/aes/src/autodetect.rs index 2a0381a2a..dbbdeabf6 100644 --- a/aes/src/autodetect.rs +++ b/aes/src/autodetect.rs @@ -1,7 +1,7 @@ //! Autodetection support for hardware accelerated AES backends with fallback //! to the fixsliced "soft" implementation. -use crate::{Block, ParBlocks}; +use crate::{soft, Block, ParBlocks}; use cipher::{ consts::{U16, U24, U32, U8}, generic_array::GenericArray, @@ -9,7 +9,13 @@ use cipher::{ }; use core::mem::ManuallyDrop; -cpufeatures::new!(aes_cpuid, "aes"); +#[cfg(all(target_arch = "aarch64", feature = "armv8"))] +use crate::armv8 as intrinsics; + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +use crate::ni as intrinsics; + +cpufeatures::new!(aes_intrinsics, "aes"); macro_rules! define_aes_impl { ( @@ -21,15 +27,16 @@ macro_rules! define_aes_impl { #[doc=$doc] pub struct $name { inner: $module::Inner, - token: aes_cpuid::InitToken, + token: aes_intrinsics::InitToken, } mod $module { + use super::{intrinsics, soft}; use core::mem::ManuallyDrop; pub(super) union Inner { - pub(super) ni: ManuallyDrop, - pub(super) soft: ManuallyDrop, + pub(super) intrinsics: ManuallyDrop, + pub(super) soft: ManuallyDrop, } } @@ -38,15 +45,15 @@ macro_rules! define_aes_impl { #[inline] fn new(key: &GenericArray) -> Self { - let (token, aesni_present) = aes_cpuid::init_get(); + let (token, aesni_present) = aes_intrinsics::init_get(); let inner = if aesni_present { $module::Inner { - ni: ManuallyDrop::new(crate::ni::$name::new(key)), + intrinsics: ManuallyDrop::new(intrinsics::$name::new(key)), } } else { $module::Inner { - soft: ManuallyDrop::new(crate::soft::$name::new(key)), + soft: ManuallyDrop::new(soft::$name::new(key)), } }; @@ -58,7 +65,7 @@ macro_rules! define_aes_impl { fn clone(&self) -> Self { let inner = if self.token.get() { $module::Inner { - ni: unsafe { self.inner.ni.clone() }, + intrinsics: unsafe { self.inner.intrinsics.clone() }, } } else { $module::Inner { @@ -82,7 +89,7 @@ macro_rules! define_aes_impl { #[inline] fn encrypt_block(&self, block: &mut Block) { if self.token.get() { - unsafe { self.inner.ni.encrypt_block(block) } + unsafe { self.inner.intrinsics.encrypt_block(block) } } else { unsafe { self.inner.soft.encrypt_block(block) } } @@ -91,7 +98,7 @@ macro_rules! define_aes_impl { #[inline] fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) { if self.token.get() { - unsafe { self.inner.ni.encrypt_par_blocks(blocks) } + unsafe { self.inner.intrinsics.encrypt_par_blocks(blocks) } } else { unsafe { self.inner.soft.encrypt_par_blocks(blocks) } } @@ -102,7 +109,7 @@ macro_rules! define_aes_impl { #[inline] fn decrypt_block(&self, block: &mut Block) { if self.token.get() { - unsafe { self.inner.ni.decrypt_block(block) } + unsafe { self.inner.intrinsics.decrypt_block(block) } } else { unsafe { self.inner.soft.decrypt_block(block) } } @@ -111,7 +118,7 @@ macro_rules! define_aes_impl { #[inline] fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) { if self.token.get() { - unsafe { self.inner.ni.decrypt_par_blocks(blocks) } + unsafe { self.inner.intrinsics.decrypt_par_blocks(blocks) } } else { unsafe { self.inner.soft.decrypt_par_blocks(blocks) } } @@ -126,9 +133,24 @@ define_aes_impl!(Aes128, aes128, U16, "AES-128 block cipher instance"); define_aes_impl!(Aes192, aes192, U24, "AES-192 block cipher instance"); define_aes_impl!(Aes256, aes256, U32, "AES-256 block cipher instance"); -#[cfg(feature = "ctr")] +#[cfg(all(feature = "ctr", target_arch = "aarch64"))] +pub(crate) mod ctr { + use super::{Aes128, Aes192, Aes256}; + + /// AES-128 in CTR mode + pub type Aes128Ctr = ::ctr::Ctr64BE; + + /// AES-192 in CTR mode + pub type Aes192Ctr = ::ctr::Ctr64BE; + + /// AES-256 in CTR mode + pub type Aes256Ctr = ::ctr::Ctr64BE; +} + +#[cfg(all(feature = "ctr", any(target_arch = "x86_64", target_arch = "x86")))] pub(crate) mod ctr { use super::{Aes128, Aes192, Aes256}; + use crate::{ni, soft}; use cipher::{ errors::{LoopError, OverflowError}, generic_array::GenericArray, @@ -153,11 +175,12 @@ pub(crate) mod ctr { } mod $module { + use crate::{ni, soft}; use core::mem::ManuallyDrop; pub(super) union Inner { - pub(super) ni: ManuallyDrop, - pub(super) soft: ManuallyDrop, + pub(super) ni: ManuallyDrop, + pub(super) soft: ManuallyDrop, } } @@ -172,8 +195,8 @@ pub(crate) mod ctr { let (token, aesni_present) = aes_ssse3_cpuid::init_get(); let inner = if aesni_present { - let ni = crate::ni::$name::from_block_cipher( - unsafe { (*cipher.inner.ni).clone() }, + let ni = ni::$name::from_block_cipher( + unsafe { (*cipher.inner.intrinsics).clone() }, nonce, ); @@ -181,7 +204,7 @@ pub(crate) mod ctr { ni: ManuallyDrop::new(ni), } } else { - let soft = crate::soft::$name::from_block_cipher( + let soft = soft::$name::from_block_cipher( unsafe { (*cipher.inner.soft).clone() }, nonce, ); diff --git a/aes/src/lib.rs b/aes/src/lib.rs index a2b2dbfb8..e2bf7194e 100644 --- a/aes/src/lib.rs +++ b/aes/src/lib.rs @@ -2,7 +2,6 @@ //! (a.k.a. Rijndael) //! //! # Supported platforms -//! //! This crate provides two different backends based on what target features //! are available: //! @@ -14,6 +13,18 @@ //! architectures with `target-feature=+aes`, as well as an accelerated //! AES-CTR implementation with `target-feature=+aes,+ssse3` //! +//! ## ARMv8 intrinsics (nightly-only) +//! On `aarch64` targets including `aarch64-apple-darwin` (Apple M1) and Linux +//! targets such as `aarch64-unknown-linux-gnu` and `aarch64-unknown-linux-musl`, +//! support for using AES intrinsics provided by the ARMv8 Cryptography Extensions +//! is available when using the nightly compiler, and can be enabled using the +//! `armv8` crate feature. +//! +//! On Linux and macOS, when the `armv8` feature is enabled support for AES +//! intrinsics is autodetected at runtime. On other platforms the `crypto` +//! target feature must be enabled via RUSTFLAGS. +//! +//! ## `x86`/`x86_64` intrinsics (AES-NI) //! By default this crate uses runtime detection on `i686`/`x86_64` targets //! in order to determine if AES-NI is available, and if it is not, it will //! fallback to using a constant-time software implementation. @@ -64,6 +75,7 @@ #![no_std] #![cfg_attr(docsrs, feature(doc_cfg))] +#![cfg_attr(feature = "armv8", feature(stdsimd, aarch64_target_feature))] #![doc( html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg" @@ -73,7 +85,16 @@ use cfg_if::cfg_if; cfg_if! { - if #[cfg(all( + if #[cfg(all(target_arch = "aarch64", feature = "armv8", not(feature = "force-soft")))] { + mod armv8; + mod autodetect; + mod soft; + + pub use autodetect::{Aes128, Aes192, Aes256}; + + #[cfg(feature = "ctr")] + pub use autodetect::ctr::{Aes128Ctr, Aes192Ctr, Aes256Ctr}; + } else if #[cfg(all( any(target_arch = "x86_64", target_arch = "x86"), not(feature = "force-soft") ))] { diff --git a/aes/src/ni/aes128.rs b/aes/src/ni/aes128.rs index 16f293431..f079fdd11 100644 --- a/aes/src/ni/aes128.rs +++ b/aes/src/ni/aes128.rs @@ -1,10 +1,8 @@ use super::{ arch::*, - utils::{ - aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, Block128, Block128x8, - U128x8, - }, + utils::{aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, U128x8}, }; +use crate::{Block, ParBlocks}; use cipher::{ consts::{U16, U8}, generic_array::GenericArray, @@ -91,7 +89,7 @@ impl BlockCipher for Aes128 { impl BlockEncrypt for Aes128 { #[inline] - fn encrypt_block(&self, block: &mut Block128) { + fn encrypt_block(&self, block: &mut Block) { // Safety: `loadu` and `storeu` support unaligned access #[allow(clippy::cast_ptr_alignment)] unsafe { @@ -102,7 +100,7 @@ impl BlockEncrypt for Aes128 { } #[inline] - fn encrypt_par_blocks(&self, blocks: &mut Block128x8) { + fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) { let b = self.encrypt8(load8(blocks)); store8(blocks, b); } @@ -110,10 +108,10 @@ impl BlockEncrypt for Aes128 { impl BlockDecrypt for Aes128 { #[inline] - fn decrypt_block(&self, block: &mut Block128) { + fn decrypt_block(&self, block: &mut Block) { #[inline] #[target_feature(enable = "aes")] - unsafe fn aes128_decrypt1(block: &mut Block128, keys: &RoundKeys) { + unsafe fn aes128_decrypt1(block: &mut Block, keys: &RoundKeys) { // Safety: `loadu` and `storeu` support unaligned access #[allow(clippy::cast_ptr_alignment)] let mut b = _mm_loadu_si128(block.as_ptr() as *const __m128i); @@ -139,10 +137,10 @@ impl BlockDecrypt for Aes128 { } #[inline] - fn decrypt_par_blocks(&self, blocks: &mut Block128x8) { + fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) { #[inline] #[target_feature(enable = "aes")] - unsafe fn aes128_decrypt8(blocks: &mut Block128x8, keys: &RoundKeys) { + unsafe fn aes128_decrypt8(blocks: &mut ParBlocks, keys: &RoundKeys) { let mut b = load8(blocks); xor8(&mut b, keys[10]); aesdec8(&mut b, keys[9]); diff --git a/aes/src/ni/aes192.rs b/aes/src/ni/aes192.rs index 6ca30f058..fb6428900 100644 --- a/aes/src/ni/aes192.rs +++ b/aes/src/ni/aes192.rs @@ -1,10 +1,8 @@ use super::{ arch::*, - utils::{ - aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, Block128, Block128x8, - U128x8, - }, + utils::{aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, U128x8}, }; +use crate::{Block, ParBlocks}; use cipher::{ consts::{U16, U24, U8}, generic_array::GenericArray, @@ -93,7 +91,7 @@ impl BlockCipher for Aes192 { impl BlockEncrypt for Aes192 { #[inline] - fn encrypt_block(&self, block: &mut Block128) { + fn encrypt_block(&self, block: &mut Block) { // Safety: `loadu` and `storeu` support unaligned access #[allow(clippy::cast_ptr_alignment)] unsafe { @@ -104,7 +102,7 @@ impl BlockEncrypt for Aes192 { } #[inline] - fn encrypt_par_blocks(&self, blocks: &mut Block128x8) { + fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) { let b = self.encrypt8(load8(blocks)); store8(blocks, b); } @@ -112,10 +110,10 @@ impl BlockEncrypt for Aes192 { impl BlockDecrypt for Aes192 { #[inline] - fn decrypt_block(&self, block: &mut Block128) { + fn decrypt_block(&self, block: &mut Block) { #[inline] #[target_feature(enable = "aes")] - unsafe fn aes192_decrypt1(block: &mut Block128, keys: &RoundKeys) { + unsafe fn aes192_decrypt1(block: &mut Block, keys: &RoundKeys) { // Safety: `loadu` and `storeu` support unaligned access #[allow(clippy::cast_ptr_alignment)] let mut b = _mm_loadu_si128(block.as_ptr() as *const __m128i); @@ -143,10 +141,10 @@ impl BlockDecrypt for Aes192 { } #[inline] - fn decrypt_par_blocks(&self, blocks: &mut Block128x8) { + fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) { #[inline] #[target_feature(enable = "aes")] - unsafe fn aes192_decrypt8(blocks: &mut Block128x8, keys: &RoundKeys) { + unsafe fn aes192_decrypt8(blocks: &mut ParBlocks, keys: &RoundKeys) { let mut b = load8(blocks); xor8(&mut b, keys[12]); aesdec8(&mut b, keys[11]); diff --git a/aes/src/ni/aes256.rs b/aes/src/ni/aes256.rs index a4170ad81..9a752c164 100644 --- a/aes/src/ni/aes256.rs +++ b/aes/src/ni/aes256.rs @@ -1,10 +1,8 @@ use super::{ arch::*, - utils::{ - aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, Block128, Block128x8, - U128x8, - }, + utils::{aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, U128x8}, }; +use crate::{Block, ParBlocks}; use cipher::{ consts::{U16, U32, U8}, generic_array::GenericArray, @@ -97,7 +95,7 @@ impl BlockCipher for Aes256 { impl BlockEncrypt for Aes256 { #[inline] - fn encrypt_block(&self, block: &mut Block128) { + fn encrypt_block(&self, block: &mut Block) { // Safety: `loadu` and `storeu` support unaligned access #[allow(clippy::cast_ptr_alignment)] unsafe { @@ -108,7 +106,7 @@ impl BlockEncrypt for Aes256 { } #[inline] - fn encrypt_par_blocks(&self, blocks: &mut Block128x8) { + fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) { let b = self.encrypt8(load8(blocks)); store8(blocks, b); } @@ -116,10 +114,10 @@ impl BlockEncrypt for Aes256 { impl BlockDecrypt for Aes256 { #[inline] - fn decrypt_block(&self, block: &mut Block128) { + fn decrypt_block(&self, block: &mut Block) { #[inline] #[target_feature(enable = "aes")] - unsafe fn aes256_decrypt1(block: &mut Block128, keys: &RoundKeys) { + unsafe fn aes256_decrypt1(block: &mut Block, keys: &RoundKeys) { // Safety: `loadu` and `storeu` support unaligned access #[allow(clippy::cast_ptr_alignment)] let mut b = _mm_loadu_si128(block.as_ptr() as *const __m128i); @@ -149,10 +147,10 @@ impl BlockDecrypt for Aes256 { } #[inline] - fn decrypt_par_blocks(&self, blocks: &mut Block128x8) { + fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) { #[inline] #[target_feature(enable = "aes")] - unsafe fn aes256_decrypt8(blocks: &mut Block128x8, keys: &RoundKeys) { + unsafe fn aes256_decrypt8(blocks: &mut ParBlocks, keys: &RoundKeys) { let mut b = load8(blocks); xor8(&mut b, keys[14]); aesdec8(&mut b, keys[13]); diff --git a/aes/src/ni/utils.rs b/aes/src/ni/utils.rs index c24d98ce5..1fc3403dc 100644 --- a/aes/src/ni/utils.rs +++ b/aes/src/ni/utils.rs @@ -4,13 +4,8 @@ #![allow(clippy::needless_range_loop)] use super::arch::*; -use cipher::{ - consts::{U16, U8}, - generic_array::GenericArray, -}; +use crate::ParBlocks; -pub type Block128 = GenericArray; -pub type Block128x8 = GenericArray, U8>; pub type U128x8 = [__m128i; 8]; #[cfg(test)] @@ -23,7 +18,7 @@ pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) { } #[inline(always)] -pub(crate) fn load8(blocks: &Block128x8) -> U128x8 { +pub(crate) fn load8(blocks: &ParBlocks) -> U128x8 { unsafe { [ _mm_loadu_si128(blocks[0].as_ptr() as *const __m128i), @@ -39,7 +34,7 @@ pub(crate) fn load8(blocks: &Block128x8) -> U128x8 { } #[inline(always)] -pub(crate) fn store8(blocks: &mut Block128x8, b: U128x8) { +pub(crate) fn store8(blocks: &mut ParBlocks, b: U128x8) { unsafe { _mm_storeu_si128(blocks[0].as_mut_ptr() as *mut __m128i, b[0]); _mm_storeu_si128(blocks[1].as_mut_ptr() as *mut __m128i, b[1]);