small perf fix; renaming

flintlib · Jul 15, 2024 · 79b7c89 · 79b7c89
1 parent 0cd1a02
commit 79b7c89
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 11 deletions.
diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst
@@ -317,11 +317,13 @@ code for reduced overhead.
 Matrix functions
 -------------------------------------------------------------------------------
 
-.. function:: int _nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
-              int _nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+.. function:: int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+              int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
               int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx)
               int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
 
+    Different implementations of matrix multiplication.
+
 Internal functions
 -------------------------------------------------------------------------------
 

diff --git a/src/nfloat.h b/src/nfloat.h
@@ -453,8 +453,8 @@ int _nfloat_vec_submul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat
 int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
 int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
 
-int _nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
-int _nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx);
 int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 

diff --git a/src/nfloat/mat_mul.c b/src/nfloat/mat_mul.c
@@ -396,7 +396,7 @@ _nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksma
 
     extra_bits = Adelta + Bdelta + pad_top + pad_bot;
 
-    if (extra_bits > max_extra_bits)
+    if (extra_bits >= max_extra_bits)
         return gr_mat_mul_classical(C, A, B, ctx);
 
     Aexp = Amax + pad_top;
@@ -408,13 +408,13 @@ _nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksma
 }
 
 int
-_nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
 {
     return _nfloat_mat_mul_fixed(C, A, B, 0, 100000, ctx);
 }
 
 int
-_nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
 {
     return _nfloat_mat_mul_fixed(C, A, B, 1, 100000, ctx);
 }

diff --git a/src/nfloat/test/t-mat_mul.c b/src/nfloat/test/t-mat_mul.c
@@ -41,7 +41,7 @@ TEST_FUNCTION_START(mat_mul, state)
         GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 2, ctx));
 
         gr_mat_test_approx_mul_max_norm(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_waksman,
+            (gr_method_mat_binary_op) nfloat_mat_mul_waksman,
             tol, state, (prec <= 256) ? 10 : 1, 10, ctx);
 
         gr_mat_test_approx_mul_max_norm(
@@ -50,7 +50,7 @@ TEST_FUNCTION_START(mat_mul, state)
                         (prec <= 256) ? 40 : 20, ctx);
 
         gr_mat_test_approx_mul_max_norm(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_fixed_classical,
+            (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical,
             tol, state, (prec <= 256) ? 10 : 1,
                         (prec <= 256) ? 40 : 20, ctx);
 
@@ -77,7 +77,7 @@ TEST_FUNCTION_START(mat_mul, state)
         GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 6, ctx));
 
         gr_mat_test_approx_mul_pos_entrywise_accurate(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_waksman,
+            (gr_method_mat_binary_op) nfloat_mat_mul_waksman,
             tol, state, (prec <= 256) ? 10 : 1, 10, ctx);
 
         gr_mat_test_approx_mul_pos_entrywise_accurate(
@@ -86,7 +86,7 @@ TEST_FUNCTION_START(mat_mul, state)
                         (prec <= 256) ? 40 : 20, ctx);
 
         gr_mat_test_approx_mul_pos_entrywise_accurate(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_fixed_classical,
+            (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical,
             tol, state, (prec <= 256) ? 10 : 1,
                         (prec <= 256) ? 40 : 20, ctx);