diff --git a/matmul/build.sh b/matmul/build.sh index e3ab0170..895062e1 100644 --- a/matmul/build.sh +++ b/matmul/build.sh @@ -3,7 +3,7 @@ go build -o matmul_go matmul.go gccgo -O3 -g -o matmul_go_gccgo matmul.go gcc -O3 -o matmul_c matmul.c scalac -optimize matmul.scala -rustc -C opt-level=3 matmul.rs -o matmul_rs +rustc -C opt-level=3 -C target-cpu=native matmul.rs -o matmul_rs dmd -ofmatmul_d -O -release -inline matmul.d gdc -o matmul_d_gdc -O3 -frelease -finline matmul.d ldc2 -ofmatmul_d_ldc -O5 -release -inline matmul.d diff --git a/matmul/matmul.rs b/matmul/matmul.rs index a33b0efc..fd7d5ba6 100644 --- a/matmul/matmul.rs +++ b/matmul/matmul.rs @@ -1,43 +1,68 @@ -// rustc -C opt-level=3 -C lto - -fn new_mat(x: usize, y: usize) -> Vec> { - vec![vec![0f64; y]; x] -} - fn mat_gen(n: usize) -> Vec> { - let mut m = new_mat(n, n); - let tmp = 1f64 / (n as f64) / (n as f64); + let mut m = vec![vec![0.0; n]; n]; + let k = 1.0 / (n as f64) / (n as f64); for i in 0 .. n { for j in 0 .. n { - m[i][j] = tmp * (i as f64 - j as f64) * (i as f64 + j as f64); + m[i][j] = k * (i as f64 - j as f64) * (i as f64 + j as f64); } } m } #[inline(never)] +fn dot_product(a: &[f64], b: &[f64]) -> f64 { + debug_assert_eq!(a.len(), b.len()); + let size = a.len() as isize; + let mut ap = a.as_ptr(); + let mut bp = b.as_ptr(); + let mut tot = [0.0, 0.0]; + + unsafe { + let end_ptr = ap.offset(size); + const BLOCK_SIZE: isize = 16; + let block_end_ptr = ap.offset(size & !(BLOCK_SIZE - 1)); + + while ap != block_end_ptr { + for i in 0 .. BLOCK_SIZE { + tot[i as usize % 2] += *ap.offset(i) * *bp.offset(i); + } + ap = ap.offset(BLOCK_SIZE); + bp = bp.offset(BLOCK_SIZE); + } + + tot[0] += tot[1]; + + while ap != end_ptr { + tot[0] += *ap * *bp; + ap = ap.offset(1); + bp = bp.offset(1); + } + } + + tot[0] +} + fn mat_mul(a: &[Vec], b: &[Vec]) -> Vec> { let m = a.len(); let n = a[0].len(); let p = b[0].len(); - let mut b2 = new_mat(n, p); - for i in 0 .. n { - for j in 0 .. p { - b2[j][i] = b[i][j]; + let mut c = vec![vec![0.0; p]; n]; + for (i, b_row) in b.iter().enumerate() { + for (j, b_el) in b_row.iter().enumerate() { + c[j][i] = *b_el; } } - let mut c = new_mat(m, p); - - for (i, ci) in c.iter_mut().enumerate() { - for (cij, b2j) in ci.iter_mut().zip(&b2) { - *cij = a[i].iter().zip(b2j).map(|(&x, y)| x * y).sum(); + let mut res = vec![vec![0.0; p]; m]; + for (i, a_row) in a.iter().enumerate() { + for (res_ij, c_row) in res[i].iter_mut().zip(&c) { + *res_ij = dot_product(a_row, c_row); } } - c + res } fn main() { @@ -50,6 +75,5 @@ fn main() { let a = mat_gen(n); let b = mat_gen(n); let c = mat_mul(&a, &b); - println!("{}", c[n / 2][n / 2]); } \ No newline at end of file