Skip to content

Commit

Permalink
Merge commit '806ea64033a52466dd7cae351c1d93da55216c98' into support-…
Browse files Browse the repository at this point in the history
…256bit-vpclmulqdq

Adding 256-bit VPCLMULQDQ support from
tikv#8

# Conflicts:
#	Cargo.toml
  • Loading branch information
onethumb committed Dec 12, 2024
2 parents e6f1852 + 806ea64 commit c38ee6b
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 18 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ rust-version = "1.70.0"

[dependencies]
crc = "3"
lazy_static = { version = "1.4.0", optional = true }

[dev-dependencies]
crc = "3"
Expand All @@ -25,7 +26,7 @@ rand = "0.8"

[features]
pmull = [] # deprecated, no longer have any effect.
vpclmulqdq = []
vpclmulqdq = ["lazy_static"]
fake-simd = []

[[bench]]
Expand Down
103 changes: 86 additions & 17 deletions src/pclmulqdq/x86/vpclmulqdq.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{super::fold_tail, Simd, __cpuid_count, __m256i, _mm256_set_epi64x, _mm256_xor_si256};
use core::ops::BitXor;

use super::{__cpuid_count, __m256i, _mm256_set_epi64x, _mm256_xor_si256, Simd, super::fold_tail};
use lazy_static::lazy_static;

// PCLMULQDQ can be used without avx512vl. However, this is only addressed by rust recently --- so we
// need to manually specify the intrinsic, otherwise rustc will inline it poorly.
Expand All @@ -13,15 +13,21 @@ extern "C" {
#[derive(Clone, Copy, Debug)]
pub struct Simd256(__m256i);

impl Simd256 {
#[inline]
pub fn is_supported() -> bool {
lazy_static! {
static ref VPCLMULQDQ_SUPPORTED : bool = {
let avx2 = is_x86_feature_detected!("avx2");
// Rust is very confused about VPCLMULQDQ
// Let us detect it use CPUID directly
let leaf_7 = unsafe { __cpuid_count(7, 0) };
let vpclmulqdq = (leaf_7.ecx & (1u32 << 10)) != 0;
avx2 && vpclmulqdq
};
}

impl Simd256 {
#[inline]
pub fn is_supported() -> bool {
*VPCLMULQDQ_SUPPORTED
}

#[inline]
Expand Down Expand Up @@ -120,9 +126,24 @@ fn test_size_and_alignment() {
#[test]
fn test_new() {
unsafe {
let x = Simd256::new(0xd7c8_11cf_e5c5_c792, 0x86e6_5c36_e68b_4804, 0xd7c8_11cf_e5c5_c792, 0x86e6_5c36_e68b_4804);
let y = Simd256::new(0xd7c8_11cf_e5c5_c792, 0x86e6_5c36_e68b_4804, 0xd7c8_11cf_e5c5_c792, 0x86e6_5c36_e68b_4804);
let z = Simd256::new(0xfa3e_0099_cd5e_d60d, 0xad71_9ee6_57d1_498e, 0xfa3e_0099_cd5e_d60d, 0xad71_9ee6_57d1_498e);
let x = Simd256::new(
0xd7c8_11cf_e5c5_c792,
0x86e6_5c36_e68b_4804,
0xd7c8_11cf_e5c5_c792,
0x86e6_5c36_e68b_4804,
);
let y = Simd256::new(
0xd7c8_11cf_e5c5_c792,
0x86e6_5c36_e68b_4804,
0xd7c8_11cf_e5c5_c792,
0x86e6_5c36_e68b_4804,
);
let z = Simd256::new(
0xfa3e_0099_cd5e_d60d,
0xad71_9ee6_57d1_498e,
0xfa3e_0099_cd5e_d60d,
0xad71_9ee6_57d1_498e,
);
assert_eq!(x, y);
assert_ne!(x, z);
}
Expand All @@ -132,22 +153,70 @@ fn test_new() {
#[test]
fn test_xor() {
unsafe {
let x = Simd256::new(0xe450_87f9_b031_0d47, 0x3d72_e92a_96c7_4c63, 0xe450_87f9_b031_0d47, 0x3d72_e92a_96c7_4c63);
let y = Simd256::new(0x7ed8_ae0a_dfbd_89c0, 0x1c9b_dfaa_953e_0ef4, 0x7ed8_ae0a_dfbd_89c0, 0x1c9b_dfaa_953e_0ef4);
let x = Simd256::new(
0xe450_87f9_b031_0d47,
0x3d72_e92a_96c7_4c63,
0xe450_87f9_b031_0d47,
0x3d72_e92a_96c7_4c63,
);
let y = Simd256::new(
0x7ed8_ae0a_dfbd_89c0,
0x1c9b_dfaa_953e_0ef4,
0x7ed8_ae0a_dfbd_89c0,
0x1c9b_dfaa_953e_0ef4,
);
let mut z = x ^ y;
assert_eq!(z, Simd256::new(0x9a88_29f3_6f8c_8487, 0x21e9_3680_03f9_4297, 0x9a88_29f3_6f8c_8487, 0x21e9_3680_03f9_4297));
z = z ^ Simd256::new(0x57a2_0f44_c005_b2ea, 0x7056_bde9_9303_aa51, 0x57a2_0f44_c005_b2ea, 0x7056_bde9_9303_aa51);
assert_eq!(z, Simd256::new(0xcd2a_26b7_af89_366d, 0x51bf_8b69_90fa_e8c6, 0xcd2a_26b7_af89_366d, 0x51bf_8b69_90fa_e8c6));
assert_eq!(
z,
Simd256::new(
0x9a88_29f3_6f8c_8487,
0x21e9_3680_03f9_4297,
0x9a88_29f3_6f8c_8487,
0x21e9_3680_03f9_4297
)
);
z = z ^ Simd256::new(
0x57a2_0f44_c005_b2ea,
0x7056_bde9_9303_aa51,
0x57a2_0f44_c005_b2ea,
0x7056_bde9_9303_aa51,
);
assert_eq!(
z,
Simd256::new(
0xcd2a_26b7_af89_366d,
0x51bf_8b69_90fa_e8c6,
0xcd2a_26b7_af89_366d,
0x51bf_8b69_90fa_e8c6
)
);
}
}

#[cfg(all(target_feature = "avx2", target_feature = "avx512vpclmulqdq"))]
#[test]
fn test_fold_32() {
unsafe {
let x = Simd256::new(0xb5f1_2590_5645_0b6c, 0x333a_2c49_c361_9e21, 0xb5f1_2590_5645_0b6c, 0x333a_2c49_c361_9e21);
let f = x.fold_32(Simd256::new(0xbecc_9dd9_038f_c366, 0x5ba9_365b_e2e9_5bf5, 0xbecc_9dd9_038f_c366, 0x5ba9_365b_e2e9_5bf5));
assert_eq!(f, Simd256::new(0x4f55_42df_ef35_1810, 0x0c03_5bd6_70fc_5abd, 0x4f55_42df_ef35_1810, 0x0c03_5bd6_70fc_5abd));
let x = Simd256::new(
0xb5f1_2590_5645_0b6c,
0x333a_2c49_c361_9e21,
0xb5f1_2590_5645_0b6c,
0x333a_2c49_c361_9e21,
);
let f = x.fold_32(Simd256::new(
0xbecc_9dd9_038f_c366,
0x5ba9_365b_e2e9_5bf5,
0xbecc_9dd9_038f_c366,
0x5ba9_365b_e2e9_5bf5,
));
assert_eq!(
f,
Simd256::new(
0x4f55_42df_ef35_1810,
0x0c03_5bd6_70fc_5abd,
0x4f55_42df_ef35_1810,
0x0c03_5bd6_70fc_5abd
)
);
}
}

0 comments on commit c38ee6b

Please sign in to comment.