From c01e7a9c067acee403e87a087dc95becb255c7d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 2 Oct 2023 20:53:29 +0200 Subject: [PATCH] Implement `llvm.x86.aesni.*` intrinsics --- Cargo.lock | 72 +++++++ Cargo.toml | 1 + src/shims/x86/aesni.rs | 168 +++++++++++++++ src/shims/x86/mod.rs | 7 + tests/pass/intrinsics-x86-aes-vaes.rs | 291 ++++++++++++++++++++++++++ 5 files changed, 539 insertions(+) create mode 100644 src/shims/x86/aesni.rs create mode 100644 tests/pass/intrinsics-x86-aes-vaes.rs diff --git a/Cargo.lock b/Cargo.lock index e654932255..09f58a38dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aes" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "aho-corasick" version = "1.1.1" @@ -142,6 +153,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "color-eyre" version = "0.6.2" @@ -199,6 +220,15 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -218,6 +248,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctrlc" version = "3.4.1" @@ -284,6 +324,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -332,6 +382,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "instant" version = "0.1.12" @@ -469,6 +528,7 @@ dependencies = [ name = "miri" version = "0.1.0" dependencies = [ + "aes", "colored", "ctrlc", "env_logger", @@ -909,6 +969,12 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "ui_test" version = "0.21.2" @@ -954,6 +1020,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index c911a153c1..f8e507a11b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ env_logger = "0.10" log = "0.4" rand = "0.8" smallvec = "1.7" +aes = { version = "0.8.3", features = ["hazmat"] } measureme = "10.0.0" ctrlc = "3.2.5" diff --git a/src/shims/x86/aesni.rs b/src/shims/x86/aesni.rs new file mode 100644 index 0000000000..aef930595b --- /dev/null +++ b/src/shims/x86/aesni.rs @@ -0,0 +1,168 @@ +use rustc_middle::ty::layout::LayoutOf as _; +use rustc_middle::ty::Ty; +use rustc_span::Symbol; +use rustc_target::spec::abi::Abi; + +use crate::*; +use shims::foreign_items::EmulateForeignItemResult; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_aesni_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateForeignItemResult> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.aesni.").unwrap(); + + match unprefixed_name { + // Used to implement the _mm_aesdec_si128, _mm256_aesdec_epi128 + // and _mm512_aesdec_epi128 functions. + // Performs one round of an AES decryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128 + "aesdec" | "aesdec.256" | "aesdec.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + let key = aes::Block::from(key.to_le_bytes()); + let mut state = aes::Block::from(state.to_le_bytes()); + // `aes::hazmat::equiv_inv_cipher_round` documentation states that + // it performs the same operation as the x86 aesdec instruction. + aes::hazmat::equiv_inv_cipher_round(&mut state, &key); + u128::from_le_bytes(state.into()) + })?; + } + // Used to implement the _mm_aesdeclast_si128, _mm256_aesdeclast_epi128 + // and _mm512_aesdeclast_epi128 functions. + // Performs last round of an AES decryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128 + "aesdeclast" | "aesdeclast.256" | "aesdeclast.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + let mut state = aes::Block::from(state.to_le_bytes()); + // `aes::hazmat::equiv_inv_cipher_round` does the following operations: + // state = InvShiftRows(state) + // state = InvSubBytes(state) + // state = InvMixColumns(state) + // state = state ^ key + // But we need to skip the InvMixColumns. + // First, use a zeroed key to skip the XOR. + aes::hazmat::equiv_inv_cipher_round(&mut state, &aes::Block::from([0; 16])); + // Then, undo the InvMixColumns with MixColumns. + aes::hazmat::mix_columns(&mut state); + // Finally, do the XOR. + u128::from_le_bytes(state.into()) ^ key + })?; + } + // Used to implement the _mm_aesenc_si128, _mm256_aesenc_epi128 + // and _mm512_aesenc_epi128 functions. + // Performs one round of an AES encryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128 + "aesenc" | "aesenc.256" | "aesenc.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + let key = aes::Block::from(key.to_le_bytes()); + let mut state = aes::Block::from(state.to_le_bytes()); + // `aes::hazmat::cipher_round` documentation states that + // it performs the same operation as the x86 aesenc instruction. + aes::hazmat::cipher_round(&mut state, &key); + u128::from_le_bytes(state.into()) + })?; + } + // Used to implement the _mm_aesenclast_si128, _mm256_aesenclast_epi128 + // and _mm512_aesenclast_epi128 functions. + // Performs last round of an AES encryption on each 128-bit word of + // `state` with the corresponding 128-bit key of `key`. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128 + "aesenclast" | "aesenclast.256" | "aesenclast.512" => { + let [state, key] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + aes_round(this, state, key, dest, |state, key| { + let mut state = aes::Block::from(state.to_le_bytes()); + // `aes::hazmat::cipher_round` does the following operations: + // state = ShiftRows(state) + // state = SubBytes(state) + // state = MixColumns(state) + // state = state ^ key + // But we need to skip the MixColumns. + // First, use a zeroed key to skip the XOR. + aes::hazmat::cipher_round(&mut state, &aes::Block::from([0; 16])); + // Then, undo the MixColumns with InvMixColumns. + aes::hazmat::inv_mix_columns(&mut state); + // Finally, do the XOR. + u128::from_le_bytes(state.into()) ^ key + })?; + } + // Used to implement the _mm_aesimc_si128 function. + // Performs the AES InvMixColumns operation on `op` + "aesimc" => { + let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + // Transmute to `u128` + let op = op.transmute(this.machine.layouts.u128, this)?; + let dest = dest.transmute(this.machine.layouts.u128, this)?; + + let state = this.read_scalar(&op)?.to_u128()?; + let mut state = aes::Block::from(state.to_le_bytes()); + aes::hazmat::inv_mix_columns(&mut state); + + this.write_scalar(Scalar::from_u128(u128::from_le_bytes(state.into())), &dest)?; + } + // TODO: Implement the `llvm.x86.aesni.aeskeygenassist` when possible + // with an external crate. + _ => return Ok(EmulateForeignItemResult::NotSupported), + } + Ok(EmulateForeignItemResult::NeedsJumping) + } +} + +// Performs an AES round (given by `f`) on each 128-bit word of +// `state` with the corresponding 128-bit key of `key`. +fn aes_round<'tcx>( + this: &mut crate::MiriInterpCx<'_, 'tcx>, + state: &OpTy<'tcx, Provenance>, + key: &OpTy<'tcx, Provenance>, + dest: &PlaceTy<'tcx, Provenance>, + f: impl Fn(u128, u128) -> u128, +) -> InterpResult<'tcx, ()> { + assert_eq!(dest.layout.size, state.layout.size); + assert_eq!(dest.layout.size, key.layout.size); + + // Transmute arguments to arrays of `u128`. + assert_eq!(dest.layout.size.bytes() % 16, 0); + let len = dest.layout.size.bytes() / 16; + + let u128_array_layout = + this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len))?; + + let state = state.transmute(u128_array_layout, this)?; + let key = key.transmute(u128_array_layout, this)?; + let dest = dest.transmute(u128_array_layout, this)?; + + for i in 0..len { + let state = this.read_scalar(&this.project_index(&state, i)?)?.to_u128()?; + let key = this.read_scalar(&this.project_index(&key, i)?)?.to_u128()?; + let dest = this.project_index(&dest, i)?; + + let res = f(state, key); + + this.write_scalar(Scalar::from_u128(res), &dest)?; + } + + Ok(()) +} diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs index 53a4a1ef28..394c955e4c 100644 --- a/src/shims/x86/mod.rs +++ b/src/shims/x86/mod.rs @@ -7,6 +7,7 @@ use crate::*; use helpers::bool_to_simd_element; use shims::foreign_items::EmulateForeignItemResult; +mod aesni; mod sse; mod sse2; mod sse3; @@ -100,6 +101,12 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this, link_name, abi, args, dest, ); } + name if name.starts_with("aesni.") => { + return aesni::EvalContextExt::emulate_x86_aesni_intrinsic( + this, link_name, abi, args, dest, + ); + } + _ => return Ok(EmulateForeignItemResult::NotSupported), } Ok(EmulateForeignItemResult::NeedsJumping) diff --git a/tests/pass/intrinsics-x86-aes-vaes.rs b/tests/pass/intrinsics-x86-aes-vaes.rs new file mode 100644 index 0000000000..090b1db0af --- /dev/null +++ b/tests/pass/intrinsics-x86-aes-vaes.rs @@ -0,0 +1,291 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+aes,+vaes,+avx512f + +#![feature(avx512_target_feature, stdsimd)] + +use core::mem::transmute; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + assert!(is_x86_feature_detected!("aes")); + assert!(is_x86_feature_detected!("vaes")); + assert!(is_x86_feature_detected!("avx512f")); + + unsafe { + test_aes(); + test_vaes(); + } +} + +// The constants in the tests below are just bit patterns. They should not +// be interpreted as integers; signedness does not make sense for them, but +// __m128i happens to be defined in terms of signed integers. +#[allow(overflowing_literals)] +#[target_feature(enable = "aes")] +unsafe fn test_aes() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/aes.rs + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesdec_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee); + let r = _mm_aesdec_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesdec_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesdeclast_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493); + let r = _mm_aesdeclast_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesdeclast_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesenc_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333); + let r = _mm_aesenc_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesenc_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesenclast_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); + let r = _mm_aesenclast_si128(a, k); + assert_eq_m128i(r, e); + } + test_mm_aesenclast_si128(); + + #[target_feature(enable = "aes")] + unsafe fn test_mm_aesimc_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055); + let r = _mm_aesimc_si128(a); + assert_eq_m128i(r, e); + } + test_mm_aesimc_si128(); +} + +// The constants in the tests below are just bit patterns. They should not +// be interpreted as integers; signedness does not make sense for them, but +// __m128i happens to be defined in terms of signed integers. +#[allow(overflowing_literals)] +#[target_feature(enable = "vaes,avx512f")] +unsafe fn test_vaes() { + #[target_feature(enable = "avx")] + unsafe fn get_a256() -> __m256i { + // Constants are random + _mm256_set_epi64x( + 0xb89f43a558d3cd51, + 0x57b3e81e369bd603, + 0xf177a1a626933fd6, + 0x50d8adbed1a2f9d7, + ) + } + #[target_feature(enable = "avx")] + unsafe fn get_k256() -> __m256i { + // Constants are random + _mm256_set_epi64x( + 0x503ff704588b5627, + 0xe23d882ed9c3c146, + 0x2785e5b670155b3c, + 0xa750718e183549ff, + ) + } + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesdec_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesdec_epi128(a, k); + + // Check results. + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesdec_epi128(); + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesdeclast_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesdeclast_epi128(a, k); + + // Check results. + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesdeclast_epi128(); + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesenc_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesenc_epi128(a, k); + + // Check results. + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesenc_epi128(); + + #[target_feature(enable = "vaes")] + unsafe fn test_mm256_aesenclast_epi128() { + let a = get_a256(); + let k = get_k256(); + let r = _mm256_aesenclast_epi128(a, k); + + // Check results. + let a: [u128; 2] = transmute(a); + let k: [u128; 2] = transmute(k); + let r: [u128; 2] = transmute(r); + for i in 0..2 { + let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm256_aesenclast_epi128(); + + #[target_feature(enable = "avx512f")] + unsafe fn get_a512() -> __m512i { + // Constants are random + _mm512_set_epi64( + 0xb89f43a558d3cd51, + 0x57b3e81e369bd603, + 0xf177a1a626933fd6, + 0x50d8adbed1a2f9d7, + 0xfbfee3116629db78, + 0x6aef4a91f2ad50f4, + 0x4258bb51ff1d476d, + 0x31da65761c8016cf, + ) + } + #[target_feature(enable = "avx512f")] + unsafe fn get_k512() -> __m512i { + // Constants are random + _mm512_set_epi64( + 0x503ff704588b5627, + 0xe23d882ed9c3c146, + 0x2785e5b670155b3c, + 0xa750718e183549ff, + 0xdfb408830a65d3d9, + 0x0de3d92adac81b0a, + 0xed2741fe12877cae, + 0x3251ddb5404e0974, + ) + } + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesdec_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesdec_epi128(a, k); + + // Check results. + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesdec_epi128(); + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesdeclast_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesdeclast_epi128(a, k); + + // Check results. + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesdeclast_epi128(); + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesenc_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesenc_epi128(a, k); + + // Check results. + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesenc_epi128(); + + #[target_feature(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesenclast_epi128() { + let a = get_a512(); + let k = get_k512(); + let r = _mm512_aesenclast_epi128(a, k); + + // Check results. + let a: [u128; 4] = transmute(a); + let k: [u128; 4] = transmute(k); + let r: [u128; 4] = transmute(r); + for i in 0..4 { + let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i]))); + assert_eq!(r[i], e); + } + } + test_mm512_aesenclast_epi128(); +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +}