From 18baa9a22fe0469974aa3eb94d0dc7d123516e3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 2 Oct 2023 20:53:29 +0200 Subject: [PATCH] Implement `llvm.x86.aesni.*` intrinsics --- src/shims/x86/aesni.rs | 384 ++++++++++++++++++++++++++ src/shims/x86/mod.rs | 7 + tests/pass/intrinsics-x86-aes-vaes.rs | 292 ++++++++++++++++++++ 3 files changed, 683 insertions(+) create mode 100644 src/shims/x86/aesni.rs create mode 100644 tests/pass/intrinsics-x86-aes-vaes.rs diff --git a/src/shims/x86/aesni.rs b/src/shims/x86/aesni.rs new file mode 100644 index 0000000000..efb193cbd9 --- /dev/null +++ b/src/shims/x86/aesni.rs @@ -0,0 +1,384 @@ +use rustc_middle::ty::layout::LayoutOf as _; +use rustc_middle::ty::Ty; +use rustc_span::Symbol; +use rustc_target::spec::abi::Abi; + +use crate::*; +use shims::foreign_items::EmulateByNameResult; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_aesni_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.aesni.").unwrap(); + + match unprefixed_name { + // Used to implement the _mm_aesdec_si128, _mm256_aesdec_epi128 + // and _mm512_aesdec_epi128 functions. + // Performs one round of an AES decryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + "aesdec" | "aesdec.256" | "aesdec.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + // As described in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128 + let state = state.to_le_bytes(); + let state = inv_shift_rows(state); + let state = inv_sub_bytes(state); + let state = inv_mix_columns(state); + let state = u128::from_le_bytes(state); + state ^ key + })?; + } + // Used to implement the _mm_aesdeclast_si128, _mm256_aesdeclast_epi128 + // and _mm512_aesdeclast_epi128 functions. + // Performs last round of an AES decryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + "aesdeclast" | "aesdeclast.256" | "aesdeclast.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + // As described in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128 + let state = state.to_le_bytes(); + let state = inv_shift_rows(state); + let state = inv_sub_bytes(state); + let state = u128::from_le_bytes(state); + state ^ key + })?; + } + // Used to implement the _mm_aesenc_si128, _mm256_aesenc_epi128 + // and _mm512_aesenc_epi128 functions. + // Performs one round of an AES encryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + "aesenc" | "aesenc.256" | "aesenc.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + // As described in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128 + let state = state.to_le_bytes(); + let state = shift_rows(state); + let state = sub_bytes(state); + let state = mix_columns(state); + let state = u128::from_le_bytes(state); + state ^ key + })?; + } + // Used to implement the _mm_aesenclast_si128, _mm256_aesenclast_epi128 + // and _mm512_aesenclast_epi128 functions. + // Performs last round of an AES encryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + "aesenclast" | "aesenclast.256" | "aesenclast.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + // As described in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128 + let state = state.to_le_bytes(); + let state = shift_rows(state); + let state = sub_bytes(state); + let state = u128::from_le_bytes(state); + state ^ key + })?; + } + // Used to implement the _mm_aesimc_si128 function. + // Performs the AES InvMixColumns operation on `op` + "aesimc" => { + let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + // Transmute to `u128` + let op = op.transmute(this.machine.layouts.u128, this)?; + let dest = dest.transmute(this.machine.layouts.u128, this)?; + + let op = this.read_scalar(&op)?.to_u128()?; + let res = u128::from_le_bytes(inv_mix_columns(op.to_le_bytes())); + + this.write_scalar(Scalar::from_u128(res), &dest)?; + } + // Used to implement the _mm_aeskeygenassist_si128 function. + // Assist in expanding the AES cipher key by computing steps + // towards generating a round key for encryption cipher using + // data from `op` and an 8-bit round constant `imm`. + "aeskeygenassist" => { + let [op, imm] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + // Transmute to `[u32; 4]` + let u32x4_layout = + this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u32, 4))?; + let op = op.transmute(u32x4_layout, this)?; + let dest = dest.transmute(u32x4_layout, this)?; + + // As described in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128 + // First and third elements are unused + let x1 = this.read_scalar(&this.project_index(&op, 1)?)?.to_u32()?; + let x3 = this.read_scalar(&this.project_index(&op, 3)?)?.to_u32()?; + let imm = this.read_scalar(imm)?.to_u8()?; + + let rcon = u32::from(imm); + let res0 = sub_word(x1); + let res1 = rot_word(sub_word(x1)) ^ rcon; + let res2 = sub_word(x3); + let res3 = rot_word(sub_word(x3)) ^ rcon; + + this.write_scalar(Scalar::from_u32(res0), &this.project_index(&dest, 0)?)?; + this.write_scalar(Scalar::from_u32(res1), &this.project_index(&dest, 1)?)?; + this.write_scalar(Scalar::from_u32(res2), &this.project_index(&dest, 2)?)?; + this.write_scalar(Scalar::from_u32(res3), &this.project_index(&dest, 3)?)?; + } + _ => return Ok(EmulateByNameResult::NotSupported), + } + Ok(EmulateByNameResult::NeedsJumping) + } +} + +// Performs an AES round (given by `f`) on each 128-bit word of +// `state` with the corresponding 128-bit key of `key`. +fn aes_round<'tcx>( + this: &mut crate::MiriInterpCx<'_, 'tcx>, + state: &OpTy<'tcx, Provenance>, + key: &OpTy<'tcx, Provenance>, + dest: &PlaceTy<'tcx, Provenance>, + f: impl Fn(u128, u128) -> u128, +) -> InterpResult<'tcx, ()> { + assert_eq!(dest.layout.size, state.layout.size); + assert_eq!(dest.layout.size, key.layout.size); + + // Transmute arguments to arrays of `u128`. + assert_eq!(dest.layout.size.bytes() % 16, 0); + let len = dest.layout.size.bytes() / 16; + + let u128_array_layout = + this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len))?; + + let state = state.transmute(u128_array_layout, this)?; + let key = key.transmute(u128_array_layout, this)?; + let dest = dest.transmute(u128_array_layout, this)?; + + for i in 0..len { + let state = this.read_scalar(&this.project_index(&state, i)?)?.to_u128()?; + let key = this.read_scalar(&this.project_index(&key, i)?)?.to_u128()?; + let dest = this.project_index(&dest, i)?; + + let res = f(state, key); + + this.write_scalar(Scalar::from_u128(res), &dest)?; + } + + Ok(()) +} + +// AES cypher primitives. They are not optimized in any way, they are +// a direct implementation based on the descriptions from "FIPS 197, Advanced +// Encryption Standard (AES)". +// https://csrc.nist.gov/files/pubs/fips/197/final/docs/fips-197.pdf + +// Primitives operate on 4x4 matrices represented with a 16-element array +// in column-major order. + +/// ShiftRows - cyclically shifts the last three rows +#[rustfmt::skip] +fn shift_rows(s: [u8; 16]) -> [u8; 16] { + [ + s[0], s[5], s[10], s[15], + s[4], s[9], s[14], s[3], + s[8], s[13], s[2], s[7], + s[12], s[1], s[6], s[11], + ] +} + +/// InvShiftRows - cyclically inverse shifts the last three rows +#[rustfmt::skip] +fn inv_shift_rows(s: [u8; 16]) -> [u8; 16] { + [ + s[0], s[13], s[10], s[7], + s[4], s[1], s[14], s[11], + s[8], s[5], s[2], s[15], + s[12], s[9], s[6], s[3], + ] +} + +#[rustfmt::skip] + const SBOX: [u8; 256] = [ + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, + ]; + +/// SubBytes - S-box transformation +fn sub_bytes(s: [u8; 16]) -> [u8; 16] { + s.map(|byte| SBOX[usize::from(byte)]) +} + +/// InvSubBytes - inverse S-box transformation +fn inv_sub_bytes(s: [u8; 16]) -> [u8; 16] { + #[rustfmt::skip] + const INV_SBOX: [u8; 256] = [ + 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, + 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, + 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, + 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, + 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, + 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, + 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, + 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, + 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, + 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, + 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, + 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, + 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, + 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, + 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D, + ]; + s.map(|byte| INV_SBOX[usize::from(byte)]) +} + +/// MixColumns - multiplies `MATRIX` by `s` in GF(2^8) +fn mix_columns(s: [u8; 16]) -> [u8; 16] { + #[rustfmt::skip] + const MATRIX: [u8; 16] = [ + 0x02, 0x01, 0x01, 0x03, + 0x03, 0x02, 0x01, 0x01, + 0x01, 0x03, 0x02, 0x01, + 0x01, 0x01, 0x03, 0x02, + ]; + mul_matrix_gf2p8(MATRIX, s) +} + +/// InvMixColumns - multiplies `INV_MATRIX` by `s` in GF(2^8) +fn inv_mix_columns(s: [u8; 16]) -> [u8; 16] { + #[rustfmt::skip] + const INV_MATRIX: [u8; 16] = [ + 0x0E, 0x09, 0x0D, 0x0B, + 0x0B, 0x0E, 0x09, 0x0D, + 0x0D, 0x0B, 0x0E, 0x09, + 0x09, 0x0D, 0x0B, 0x0E, + ]; + mul_matrix_gf2p8(INV_MATRIX, s) +} + +/// Matrix multiplication where each element is a GF(2^8) polynomial +fn mul_matrix_gf2p8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] { + fn index(row: usize, col: usize) -> usize { + // Calculate index of element at (row,col) + // Remember that elements are arranged in column-major order. + col.checked_mul(4).unwrap().checked_add(row).unwrap() + } + + let mut res = [0; 16]; + for row in 0..4 { + for col in 0..4 { + let res = &mut res[index(row, col)]; + *res ^= mul_gf2p8(a[index(row, 0)], b[index(0, col)]); + *res ^= mul_gf2p8(a[index(row, 1)], b[index(1, col)]); + *res ^= mul_gf2p8(a[index(row, 2)], b[index(2, col)]); + *res ^= mul_gf2p8(a[index(row, 3)], b[index(3, col)]); + } + } + res +} + +/// SubWord - S-box transformation on 4 bytes +fn sub_word(w: u32) -> u32 { + u32::from_ne_bytes(w.to_ne_bytes().map(|byte| SBOX[usize::from(byte)])) +} + +/// RotWord - Rotate 32-bit word 1 byte +fn rot_word(w: u32) -> u32 { + w.rotate_right(8) +} + +/// Modular multiplication of GF(2^8) polynomials `a(x)` and `b(x)`. +fn mul_gf2p8(mut a: u8, mut b: u8) -> u8 { + // Irreducible polynomial defined in equation (4.1) of FIPS 197. + const M: u8 = 0b11011; // m(x) = x^8 + x^4 + x^3 + x^1 + 1 + + // Start with res(x) = 0 + let mut res = 0; + for _ in 0..8 { + if (a & 1) == 1 { + // If the constant term of `a(x)` is 1, + // add `b(x)` to `res(x)` + res ^= b; + } + // Shift down degree of `a(x)` by 1 + a >>= 1; + // Shift up degree of `b(x)` by 1 (modulo `m(x)`) + if (b & 0x80) != 0 { + b = (b << 1) ^ M; + } else { + b <<= 1; + } + } + res +} + +#[cfg(test)] +mod tests { + use super::*; + + // Magic values from "Appendix C.1" + const STATE_START: [u8; 16] = 0x00102030405060708090A0B0C0D0E0F0_u128.to_be_bytes(); + const STATE_S_BOX: [u8; 16] = 0x63CAB7040953D051CD60E0E7BA70E18C_u128.to_be_bytes(); + const STATE_S_ROW: [u8; 16] = 0x6353E08C0960E104CD70B751BACAD0E7_u128.to_be_bytes(); + const STATE_M_COL: [u8; 16] = 0x5F72641557F5BC92F7BE3B291DB9F91A_u128.to_be_bytes(); + + #[test] + fn test_shift_rows() { + assert_eq!(shift_rows(STATE_S_BOX), STATE_S_ROW); + } + + #[test] + fn test_inv_shift_rows() { + assert_eq!(inv_shift_rows(STATE_S_ROW), STATE_S_BOX); + } + + #[test] + fn test_sub_bytes() { + assert_eq!(sub_bytes(STATE_START), STATE_S_BOX); + } + + #[test] + fn test_inv_sub_bytes() { + assert_eq!(inv_sub_bytes(STATE_S_BOX), STATE_START); + } + + #[test] + fn test_mix_columns() { + assert_eq!(mix_columns(STATE_S_ROW), STATE_M_COL); + } + + #[test] + fn test_inv_mix_columns() { + assert_eq!(inv_mix_columns(STATE_M_COL), STATE_S_ROW); + } +} diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs index 7c280109cb..6668d1a23b 100644 --- a/src/shims/x86/mod.rs +++ b/src/shims/x86/mod.rs @@ -7,6 +7,7 @@ use crate::*; use helpers::bool_to_simd_element; use shims::foreign_items::EmulateByNameResult; +mod aesni; mod sse; mod sse2; mod sse3; @@ -100,6 +101,12 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this, link_name, abi, args, dest, ); } + name if name.starts_with("aesni.") => { + return aesni::EvalContextExt::emulate_x86_aesni_intrinsic( + this, link_name, abi, args, dest, + ); + } + _ => return Ok(EmulateByNameResult::NotSupported), } Ok(EmulateByNameResult::NeedsJumping) diff --git a/tests/pass/intrinsics-x86-aes-vaes.rs b/tests/pass/intrinsics-x86-aes-vaes.rs new file mode 100644 index 0000000000..2138fe4181 --- /dev/null +++ b/tests/pass/intrinsics-x86-aes-vaes.rs @@ -0,0 +1,292 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+aes,+vaes,+avx512f + +#![feature(avx512_target_feature, stdsimd)] + +use core::mem::transmute; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + assert!(is_x86_feature_detected!("aes")); + assert!(is_x86_feature_detected!("vaes")); + + unsafe { + test_aes(); + test_vaes(); + } +} + +// The constants in the tests below are just bit patterns. They should not +// be interpreted as integers; signedness does not make sense for them, but +// __m128i happens to be defined in terms of signed integers. +#[allow(overflowing_literals)] +#[target_feature(enable = "aes")] +unsafe fn test_aes() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/aes.rs + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesdec_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee); + let r = _mm_aesdec_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesdec_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesdeclast_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493); + let r = _mm_aesdeclast_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesdeclast_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesenc_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333); + let r = _mm_aesenc_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesenc_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesenclast_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); + let r = _mm_aesenclast_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesenclast_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesimc_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055); + let r = _mm_aesimc_si128(a); + assert_eq_m128i(r, e); + } + test_mm_aesimc_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aeskeygenassist_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714138.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let e = _mm_set_epi64x(0x857c266b7c266e85, 0xeac4eea9c4eeacea); + let r = _mm_aeskeygenassist_si128::<5>(a); + assert_eq_m128i(r, e); + } + test_mm_aeskeygenassist_si128(); +} + +// The constants in the tests below are just bit patterns. They should not +// be interpreted as integers; signedness does not make sense for them, but +// __m128i happens to be defined in terms of signed integers. +#[allow(overflowing_literals)] +#[target_feature(enable = "vaes,avx512f")] +unsafe fn test_vaes() { + #[target_feature(enable = "avx")] + unsafe fn get_a256() -> __m256i { + // Constants are random + _mm256_set_epi64x( + 0xb89f43a558d3cd51, + 0x57b3e81e369bd603, + 0xf177a1a626933fd6, + 0x50d8adbed1a2f9d7, + ) + } + #[target_feature(enable = "avx")] + unsafe fn get_k256() -> __m256i { + // Constants are random + _mm256_set_epi64x( + 0x503ff704588b5627, + 0xe23d882ed9c3c146, + 0x2785e5b670155b3c, + 0xa750718e183549ff, + ) + } + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesdec_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesdec_epi128(a, k); + + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesdec_epi128(); + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesdeclast_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesdeclast_epi128(a, k); + + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesdeclast_epi128(); + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesenc_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesenc_epi128(a, k); + + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesenc_epi128(); + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesenclast_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesenclast_epi128(a, k); + + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesenclast_epi128(); + + #[target_feature(enable = "avx512f")] + unsafe fn get_a512() -> __m512i { + // Constants are random + _mm512_set_epi64( + 0xb89f43a558d3cd51, + 0x57b3e81e369bd603, + 0xf177a1a626933fd6, + 0x50d8adbed1a2f9d7, + 0xfbfee3116629db78, + 0x6aef4a91f2ad50f4, + 0x4258bb51ff1d476d, + 0x31da65761c8016cf, + ) + } + #[target_feature(enable = "avx512f")] + unsafe fn get_k512() -> __m512i { + // Constants are random + _mm512_set_epi64( + 0x503ff704588b5627, + 0xe23d882ed9c3c146, + 0x2785e5b670155b3c, + 0xa750718e183549ff, + 0xdfb408830a65d3d9, + 0x0de3d92adac81b0a, + 0xed2741fe12877cae, + 0x3251ddb5404e0974, + ) + } + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesdec_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesdec_epi128(a, k); + + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesdec_epi128(); + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesdeclast_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesdeclast_epi128(a, k); + + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesdeclast_epi128(); + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesenc_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesenc_epi128(a, k); + + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesenc_epi128(); + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesenclast_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesenclast_epi128(a, k); + + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesenclast_epi128(); +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +}