diff --git a/doc/source/gr.rst b/doc/source/gr.rst
index 15183b3195..79992c8a4d 100644
--- a/doc/source/gr.rst
+++ b/doc/source/gr.rst
@@ -844,6 +844,23 @@ Ordering methods
of *x* is less than, equal or greater than the absolute value of *y*.
This may return ``GR_DOMAIN`` if the ring is not an ordered ring.
+.. function:: truth_t gr_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_abs_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_abs_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_abs_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ truth_t gr_abs_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+
+ Wrappers of ``gr_cmp`` and ``gr_cmpabs`` returning truth values
+ for the comparison operations ``<=``, ``<``, ``>=``, ``>``.
+
+.. function:: int gr_min(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+ int gr_max(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+
+ Minimum and maximum value.
+
Enclosure and interval methods
........................................................................
diff --git a/doc/source/gr_mat.rst b/doc/source/gr_mat.rst
index 9a2ac83670..93d8d2b9fa 100644
--- a/doc/source/gr_mat.rst
+++ b/doc/source/gr_mat.rst
@@ -154,6 +154,7 @@ Assignment and special values
.. function:: int gr_mat_set(gr_mat_t res, const gr_mat_t mat, gr_ctx_t ctx)
int gr_mat_set_fmpz_mat(gr_mat_t res, const fmpz_mat_t mat, gr_ctx_t ctx)
int gr_mat_set_fmpq_mat(gr_mat_t res, const fmpq_mat_t mat, gr_ctx_t ctx)
+ int gr_mat_set_gr_mat_other(gr_mat_t res, const gr_mat_t mat, gr_ctx_t mat_ctx, gr_ctx_t ctx)
Sets *res* to the value of *mat*.
@@ -213,6 +214,58 @@ Basic row, column and entry operations
This predicate is always decidable (even if the underlying ring
is not computable), returning ``T_TRUE`` or ``T_FALSE``.
+Entrywise operations
+-------------------------------------------------------------------------------
+
+.. function:: int gr_mat_entrywise_unary_op(gr_mat_t res, gr_method_unary_op f, const gr_mat_t mat, gr_ctx_t ctx)
+
+ Sets *res* to the application of the function *f* to the
+ entries of matrix *mat*. Returns ``GR_DOMAIN`` if the matrix dimensions do not match.
+
+.. function:: int gr_mat_entrywise_binary_op(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx)
+
+ Sets *res* to the application of the function *f*
+ to the entries of *mat1* as first argument and the entries of *mat2*
+ as second argument.
+ Returns ``GR_DOMAIN`` if the matrix dimensions do not match.
+
+.. function:: int gr_mat_entrywise_binary_op_scalar(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx)
+
+ Sets *res* to the application of the function *f*
+ to the entries of *mat* as first argument and the scalar *c*
+ as second argument.
+ Returns ``GR_DOMAIN`` if the matrix dimensions do not match.
+
+.. function:: truth_t gr_mat_entrywise_unary_predicate_all(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx)
+ truth_t gr_mat_entrywise_unary_predicate_any(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx)
+
+ Returns whether the predicate *f* is true for all entries,
+ respectively for any entry, in the matrix *mat*.
+
+.. function:: truth_t gr_mat_entrywise_binary_predicate_all(gr_method_binary_predicate f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx)
+
+ Returns whether the binary predicate *f* is true for all entries
+ in *mat1* paired with the corresponding entries in *mat2*.
+ Returns ``T_FALSE`` if the matrix dimensions are not compatible.
+
+Norms
+-------------------------------------------------------------------------------
+
+.. function:: int gr_mat_norm_max(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+
+ Max norm: `\max_{i,j} |a_{i,j}|`.
+
+.. function:: int gr_mat_norm_1(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+
+ 1-norm (largest absolute column sum): `\max_{1\le j \le n} \sum_{i=1}^m |a_{i,j}|`.
+
+.. function:: int gr_mat_norm_inf(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+
+ Infinity-norm (largest absolute row sum): `\max_{1\le i \le m} \sum_{j=1}^n |a_{i,j}|`.
+
+.. function:: int gr_mat_norm_frobenius(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+
+ Frobenius norm: `\sqrt{\sum_{i,j} |a_{i,j}|^2}`.
Arithmetic
-------------------------------------------------------------------------------
@@ -802,6 +855,19 @@ on each test iteration, otherwise the given ring is tested.
Tests the given function ``solve_impl`` for correctness as an implementation
of :func:`gr_mat_nonsingular_solve_tril` / :func:`gr_mat_nonsingular_solve_triu`.
+.. function:: void gr_mat_test_approx_mul_max_norm(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx)
+
+ Tests the given implementation of matrix multiplication for accuracy
+ over an approximate numerical ring by checking that
+ `|C-AB| \le |A||B| rel\_tol` holds in the max norm,
+ using classical multiplication for reference.
+
+.. function:: void gr_mat_test_approx_mul_pos_entrywise_accurate(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx)
+
+ Tests the given implementation of matrix multiplication for accuracy
+ over an approximate numerical ring by generating nonnegative matrices
+ and checking that the entrywise relative error compared to
+ classical multiplication does not exceed *rel_tol*.
.. raw:: latex
diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst
index 3f1d82e391..d708a16c6a 100644
--- a/doc/source/nfloat.rst
+++ b/doc/source/nfloat.rst
@@ -314,6 +314,16 @@ code for reduced overhead.
.. function:: int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)
int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx)
+Matrix functions
+-------------------------------------------------------------------------------
+
+.. function:: int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+ int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+ int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx)
+ int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+
+ Different implementations of matrix multiplication.
+
Internal functions
-------------------------------------------------------------------------------
diff --git a/src/gr.h b/src/gr.h
index 728366c7ac..63cb0d7fcd 100644
--- a/src/gr.h
+++ b/src/gr.h
@@ -1109,6 +1109,25 @@ GR_INLINE WARN_UNUSED_RESULT int gr_cmpabs(int * res, gr_srcptr x, gr_srcptr y,
GR_INLINE WARN_UNUSED_RESULT int gr_cmp_other(int * res, gr_srcptr x, gr_srcptr y, gr_ctx_t y_ctx, gr_ctx_t ctx) { return GR_BINARY_OP_OTHER_GET_INT(ctx, CMP_OTHER)(res, x, y, y_ctx, ctx); }
GR_INLINE WARN_UNUSED_RESULT int gr_cmpabs_other(int * res, gr_srcptr x, gr_srcptr y, gr_ctx_t y_ctx, gr_ctx_t ctx) { return GR_BINARY_OP_OTHER_GET_INT(ctx, CMPABS_OTHER)(res, x, y, y_ctx, ctx); }
+#define __GR_CMP(cfun, expr) \
+ int cmp; \
+ if ((cfun)(&cmp, x, y, ctx) != GR_SUCCESS) \
+ return T_UNKNOWN; \
+ return (expr) ? T_TRUE : T_FALSE; \
+
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp <= 0) }
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp < 0) }
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp >= 0) }
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp > 0) }
+
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp <= 0) }
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp < 0) }
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp >= 0) }
+GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp > 0) }
+
+GR_INLINE WARN_UNUSED_RESULT int gr_min(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { return GR_BINARY_OP(ctx, MIN)(res, x, y, ctx); }
+GR_INLINE WARN_UNUSED_RESULT int gr_max(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { return GR_BINARY_OP(ctx, MAX)(res, x, y, ctx); }
+
GR_INLINE WARN_UNUSED_RESULT int gr_gen(gr_ptr res, gr_ctx_t ctx) { return GR_CONSTANT_OP(ctx, GEN)(res, ctx); }
GR_INLINE WARN_UNUSED_RESULT int gr_gens(gr_vec_t res, gr_ctx_t ctx) { return GR_VEC_CTX_OP(ctx, GENS)(res, ctx); }
GR_INLINE WARN_UNUSED_RESULT int gr_gens_recursive(gr_vec_t res, gr_ctx_t ctx) { return GR_VEC_CTX_OP(ctx, GENS_RECURSIVE)(res, ctx); }
diff --git a/src/gr/acb.c b/src/gr/acb.c
index 3f7cb4e08a..0a14fbd78e 100644
--- a/src/gr/acb.c
+++ b/src/gr/acb.c
@@ -1004,6 +1004,32 @@ _gr_acb_cmpabs(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx)
return _gr_acb_cmp(res, t, u, ctx);
}
+int
+_gr_acb_min(acb_t res, const acb_t x, const acb_t y, const gr_ctx_t ctx)
+{
+ if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y)))
+ {
+ arb_min(acb_realref(res), acb_realref(x), acb_realref(y), ACB_CTX_PREC(ctx));
+ arb_zero(acb_imagref(res));
+ return GR_SUCCESS;
+ }
+ else
+ return GR_UNABLE;
+}
+
+int
+_gr_acb_max(acb_t res, const acb_t x, const acb_t y, const gr_ctx_t ctx)
+{
+ if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y)))
+ {
+ arb_max(acb_realref(res), acb_realref(x), acb_realref(y), ACB_CTX_PREC(ctx));
+ arb_zero(acb_imagref(res));
+ return GR_SUCCESS;
+ }
+ else
+ return GR_UNABLE;
+}
+
int
_gr_acb_pi(acb_t res, const gr_ctx_t ctx)
{
@@ -1330,13 +1356,7 @@ _gr_acb_gamma_fmpq(acb_t res, const fmpq_t x, const gr_ctx_t ctx)
}
}
-
-int
-_gr_acb_rgamma(acb_t res, const acb_t x, const gr_ctx_t ctx)
-{
- acb_rgamma(res, x, ACB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
+DEF_FUNC(rgamma)
int
_gr_acb_lgamma(acb_t res, const acb_t x, const gr_ctx_t ctx)
@@ -1642,12 +1662,7 @@ int _gr_acb_stieltjes(acb_t res, const fmpz_t n, const acb_t a, const gr_ctx_t c
return acb_is_finite(res) ? GR_SUCCESS : GR_UNABLE;
}
-int
-_gr_acb_dirichlet_eta(acb_t res, const acb_t x, const gr_ctx_t ctx)
-{
- acb_dirichlet_eta(res, x, ACB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
+DEF_FUNC(dirichlet_eta)
/* todo
int
@@ -2220,6 +2235,8 @@ gr_method_tab_input _acb_methods_input[] =
{GR_METHOD_ARG, (gr_funcptr) _gr_acb_arg},
{GR_METHOD_CMP, (gr_funcptr) _gr_acb_cmp},
{GR_METHOD_CMPABS, (gr_funcptr) _gr_acb_cmpabs},
+ {GR_METHOD_MIN, (gr_funcptr) _gr_acb_min},
+ {GR_METHOD_MAX, (gr_funcptr) _gr_acb_max},
{GR_METHOD_PI, (gr_funcptr) _gr_acb_pi},
{GR_METHOD_EXP, (gr_funcptr) _gr_acb_exp},
{GR_METHOD_EXPM1, (gr_funcptr) _gr_acb_expm1},
diff --git a/src/gr/arb.c b/src/gr/arb.c
index 6f315dd685..cb9fb88f58 100644
--- a/src/gr/arb.c
+++ b/src/gr/arb.c
@@ -33,6 +33,55 @@ gr_arb_ctx;
#define ARB_CTX_PREC(ring_ctx) (((gr_arb_ctx *)((ring_ctx)))->prec)
+#define DEF_FUNC(fname) \
+int \
+_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \
+{ \
+ arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \
+ return GR_SUCCESS; \
+} \
+
+#define DEF_FUNC_NOPREC(fname) \
+int \
+_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \
+{ \
+ arb_ ## fname(res, x); \
+ return GR_SUCCESS; \
+} \
+
+
+#define DEF_2FUNC(fname) \
+int \
+_gr_arb_ ## fname(arb_t res1, arb_t res2, const arb_t x, const gr_ctx_t ctx) \
+{ \
+ arb_ ## fname(res1, res2, x, ARB_CTX_PREC(ctx)); \
+ return GR_SUCCESS; \
+} \
+
+#define DEF_FUNC2(fname) \
+int \
+_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \
+{ \
+ arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \
+ return GR_SUCCESS; \
+} \
+
+#define DEF_FUNC_SING(fname) \
+int \
+_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \
+{ \
+ arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \
+ return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \
+} \
+
+#define DEF_FUNC2_SING(fname) \
+int \
+_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \
+{ \
+ arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \
+ return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \
+} \
+
int _gr_arb_ctx_set_real_prec(gr_ctx_t ctx, slong prec)
{
prec = FLINT_MAX(prec, 2);
@@ -407,26 +456,13 @@ _gr_arb_equal(const arb_t x, const arb_t y, const gr_ctx_t ctx)
return T_FALSE;
}
-int
-_gr_arb_set(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_set(res, x);
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_neg(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_neg(res, x);
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_add(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx)
-{
- arb_add(res, x, y, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
+DEF_FUNC_NOPREC(set)
+DEF_FUNC_NOPREC(neg)
+DEF_FUNC2(add)
+DEF_FUNC2(sub)
+DEF_FUNC2(addmul)
+DEF_FUNC2(submul)
+DEF_FUNC(sqr)
int
_gr_arb_add_si(arb_t res, const arb_t x, slong y, const gr_ctx_t ctx)
@@ -449,13 +485,6 @@ _gr_arb_add_fmpz(arb_t res, const arb_t x, const fmpz_t y, const gr_ctx_t ctx)
return GR_SUCCESS;
}
-int
-_gr_arb_sub(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx)
-{
- arb_sub(res, x, y, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
int
_gr_arb_sub_si(arb_t res, const arb_t x, slong y, const gr_ctx_t ctx)
{
@@ -505,20 +534,6 @@ _gr_arb_mul_fmpz(arb_t res, const arb_t x, const fmpz_t y, const gr_ctx_t ctx)
return GR_SUCCESS;
}
-int
-_gr_arb_addmul(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx)
-{
- arb_addmul(res, x, y, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_submul(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx)
-{
- arb_submul(res, x, y, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
int
_gr_arb_mul_two(arb_t res, const arb_t x, const gr_ctx_t ctx)
{
@@ -526,13 +541,6 @@ _gr_arb_mul_two(arb_t res, const arb_t x, const gr_ctx_t ctx)
return GR_SUCCESS;
}
-int
-_gr_arb_sqr(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_sqr(res, x, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
int
_gr_arb_mul_2exp_si(arb_t res, const arb_t x, slong y, const gr_ctx_t ctx)
{
@@ -810,40 +818,12 @@ _gr_arb_rsqrt(arb_t res, const arb_t x, const gr_ctx_t ctx)
}
}
-int
-_gr_arb_floor(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_floor(res, x, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_ceil(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_ceil(res, x, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_trunc(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_trunc(res, x, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_nint(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_nint(res, x, ARB_CTX_PREC(ctx));
- return GR_SUCCESS;
-}
-
-int
-_gr_arb_abs(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_abs(res, x);
- return GR_SUCCESS;
-}
+DEF_FUNC(floor)
+DEF_FUNC(ceil)
+DEF_FUNC(trunc)
+DEF_FUNC(nint)
+DEF_FUNC_NOPREC(abs)
+DEF_FUNC_NOPREC(sgn)
int
_gr_arb_conj(arb_t res, const arb_t x, const gr_ctx_t ctx)
@@ -859,13 +839,6 @@ _gr_arb_im(arb_t res, const arb_t x, const gr_ctx_t ctx)
return GR_SUCCESS;
}
-int
-_gr_arb_sgn(arb_t res, const arb_t x, const gr_ctx_t ctx)
-{
- arb_sgn(res, x);
- return GR_SUCCESS;
-}
-
int
_gr_arb_arg(arb_t res, const arb_t x, const gr_ctx_t ctx)
{
@@ -956,47 +929,6 @@ _gr_arb_glaisher(arb_t res, const gr_ctx_t ctx)
return GR_SUCCESS;
}
-#define DEF_FUNC(fname) \
-int \
-_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \
-{ \
- arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \
- return GR_SUCCESS; \
-} \
-
-#define DEF_2FUNC(fname) \
-int \
-_gr_arb_ ## fname(arb_t res1, arb_t res2, const arb_t x, const gr_ctx_t ctx) \
-{ \
- arb_ ## fname(res1, res2, x, ARB_CTX_PREC(ctx)); \
- return GR_SUCCESS; \
-} \
-
-#define DEF_FUNC2(fname) \
-int \
-_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \
-{ \
- arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \
- return GR_SUCCESS; \
-} \
-
-#define DEF_FUNC_SING(fname) \
-int \
-_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \
-{ \
- arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \
- return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \
-} \
-
-#define DEF_FUNC2_SING(fname) \
-int \
-_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \
-{ \
- arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \
- return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \
-} \
-
-
DEF_FUNC(exp)
DEF_FUNC(expm1)
DEF_FUNC_SING(log1p)
@@ -1016,6 +948,9 @@ _gr_arb_log(arb_t res, const arb_t x, const gr_ctx_t ctx)
return GR_UNABLE;
}
+DEF_FUNC2(min)
+DEF_FUNC2(max)
+
DEF_FUNC(sin)
DEF_FUNC(cos)
DEF_2FUNC(sin_cos)
@@ -1863,6 +1798,8 @@ gr_method_tab_input _arb_methods_input[] =
{GR_METHOD_ARG, (gr_funcptr) _gr_arb_arg},
{GR_METHOD_CMP, (gr_funcptr) _gr_arb_cmp},
{GR_METHOD_CMPABS, (gr_funcptr) _gr_arb_cmpabs},
+ {GR_METHOD_MIN, (gr_funcptr) _gr_arb_min},
+ {GR_METHOD_MAX, (gr_funcptr) _gr_arb_max},
{GR_METHOD_I, (gr_funcptr) gr_not_in_domain},
{GR_METHOD_PI, (gr_funcptr) _gr_arb_pi},
{GR_METHOD_EULER, (gr_funcptr) _gr_arb_euler},
diff --git a/src/gr/matrix.c b/src/gr/matrix.c
index 9a0e6aab4c..ba1af2ad0a 100644
--- a/src/gr/matrix.c
+++ b/src/gr/matrix.c
@@ -221,9 +221,6 @@ matrix_set_other(gr_mat_t res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx)
else if (x_ctx->which_ring == GR_CTX_GR_MAT)
{
const gr_mat_struct * xmat = x;
- slong i, j;
- int status;
- slong sz, xsz;
if (res->r != xmat->r || res->c != xmat->c)
{
@@ -233,24 +230,8 @@ matrix_set_other(gr_mat_t res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx)
return GR_DOMAIN;
}
- sz = MATRIX_CTX(ctx)->base_ring->sizeof_elem;
- xsz = MATRIX_CTX(x_ctx)->base_ring->sizeof_elem;
-
- for (i = 0; i < xmat->r; i++)
- {
- for (j = 0; j < xmat->c; j++)
- {
- status = gr_set_other(GR_MAT_ENTRY(res, i, j, sz),
- GR_MAT_ENTRY(xmat, i, j, xsz),
- MATRIX_CTX(x_ctx)->base_ring,
+ return gr_mat_set_gr_mat_other(res, xmat, MATRIX_CTX(x_ctx)->base_ring,
MATRIX_CTX(ctx)->base_ring);
-
- if (status != GR_SUCCESS)
- return status;
- }
- }
-
- return GR_SUCCESS;
}
else
{
diff --git a/src/gr_generic/generic.c b/src/gr_generic/generic.c
index 7b343c7828..0908526a1f 100644
--- a/src/gr_generic/generic.c
+++ b/src/gr_generic/generic.c
@@ -1317,6 +1317,34 @@ gr_generic_cmpabs_other(int * res, gr_srcptr x, gr_srcptr y, gr_ctx_t y_ctx, gr_
return status;
}
+int
+gr_generic_min(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+{
+ int cmp;
+
+ if (gr_cmp(&cmp, x, y, ctx) != GR_SUCCESS)
+ return GR_UNABLE;
+
+ if (cmp <= 0)
+ return gr_set(res, x, ctx);
+ else
+ return gr_set(res, y, ctx);
+}
+
+int
+gr_generic_max(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx)
+{
+ int cmp;
+
+ if (gr_cmp(&cmp, x, y, ctx) != GR_SUCCESS)
+ return GR_UNABLE;
+
+ if (cmp >= 0)
+ return gr_set(res, x, ctx);
+ else
+ return gr_set(res, y, ctx);
+}
+
int
gr_generic_bernoulli_ui(gr_ptr res, ulong n, gr_ctx_t ctx)
{
@@ -2664,6 +2692,8 @@ const gr_method_tab_input _gr_generic_methods[] =
{GR_METHOD_CMPABS, (gr_funcptr) gr_generic_cmpabs},
{GR_METHOD_CMP_OTHER, (gr_funcptr) gr_generic_cmp_other},
{GR_METHOD_CMPABS_OTHER, (gr_funcptr) gr_generic_cmpabs_other},
+ {GR_METHOD_MIN, (gr_funcptr) gr_generic_min},
+ {GR_METHOD_MAX, (gr_funcptr) gr_generic_max},
{GR_METHOD_EXP, (gr_funcptr) gr_generic_exp},
{GR_METHOD_EXPM1, (gr_funcptr) gr_generic_expm1},
diff --git a/src/gr_mat.h b/src/gr_mat.h
index c8c722a4cc..68b36147f1 100644
--- a/src/gr_mat.h
+++ b/src/gr_mat.h
@@ -118,6 +118,17 @@ WARN_UNUSED_RESULT int gr_mat_set_fmpq(gr_mat_t res, const fmpq_t v, gr_ctx_t ct
WARN_UNUSED_RESULT int gr_mat_set_fmpz_mat(gr_mat_t res, const fmpz_mat_t mat, gr_ctx_t ctx);
WARN_UNUSED_RESULT int gr_mat_set_fmpq_mat(gr_mat_t res, const fmpq_mat_t mat, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_set_gr_mat_other(gr_mat_t res, const gr_mat_t mat, gr_ctx_t mat_ctx, gr_ctx_t ctx);
+
+/* fixme: needed for method typedefs */
+#ifdef GR_H
+WARN_UNUSED_RESULT int gr_mat_entrywise_unary_op(gr_mat_t res, gr_method_unary_op f, const gr_mat_t mat, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_entrywise_binary_op(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_entrywise_binary_op_scalar(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx);
+truth_t gr_mat_entrywise_unary_predicate_all(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx);
+truth_t gr_mat_entrywise_unary_predicate_any(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx);
+truth_t gr_mat_entrywise_binary_predicate_all(gr_method_binary_predicate f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx);
+#endif
WARN_UNUSED_RESULT int gr_mat_neg(gr_mat_t res, const gr_mat_t mat, gr_ctx_t ctx);
WARN_UNUSED_RESULT int gr_mat_swap_entrywise(gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx);
@@ -279,6 +290,11 @@ WARN_UNUSED_RESULT int gr_mat_exp(gr_mat_t res, const gr_mat_t A, gr_ctx_t ctx);
WARN_UNUSED_RESULT int gr_mat_log_jordan(gr_mat_t res, const gr_mat_t A, gr_ctx_t ctx);
WARN_UNUSED_RESULT int gr_mat_log(gr_mat_t res, const gr_mat_t A, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_norm_max(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_norm_1(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_norm_inf(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_norm_frobenius(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx);
+
/* Test functions */
void gr_mat_test_mul(gr_method_mat_binary_op mul_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx);
@@ -286,6 +302,8 @@ void gr_mat_test_lu(gr_method_mat_lu_op lu_impl, flint_rand_t state, slong iters
void gr_mat_test_det(gr_method_mat_unary_op_get_scalar det_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx);
void gr_mat_test_nonsingular_solve_tril(gr_method_mat_binary_op_with_flag solve_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx);
void gr_mat_test_nonsingular_solve_triu(gr_method_mat_binary_op_with_flag solve_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx);
+void gr_mat_test_approx_mul_max_norm(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx);
+void gr_mat_test_approx_mul_pos_entrywise_accurate(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx);
#ifdef __cplusplus
}
diff --git a/src/gr_mat/entrywise.c b/src/gr_mat/entrywise.c
new file mode 100644
index 0000000000..d5d66be260
--- /dev/null
+++ b/src/gr_mat/entrywise.c
@@ -0,0 +1,148 @@
+/*
+ Copyright (C) 2024 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "gr.h"
+#include "gr_mat.h"
+
+int
+gr_mat_entrywise_unary_op(gr_mat_t res, gr_method_unary_op f, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx))
+ return GR_DOMAIN;
+
+ for (i = 0; i < R; i++)
+ for (j = 0; j < C; j++)
+ status |= f(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat, i, j, sz), ctx);
+
+ return status;
+}
+
+int
+gr_mat_entrywise_binary_op(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+
+ R = gr_mat_nrows(mat1, ctx);
+ C = gr_mat_ncols(mat1, ctx);
+
+ if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx) || R != gr_mat_nrows(mat2, ctx) || C != gr_mat_ncols(mat2, ctx))
+ return GR_DOMAIN;
+
+ for (i = 0; i < R; i++)
+ for (j = 0; j < C; j++)
+ status |= f(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat1, i, j, sz), GR_MAT_ENTRY(mat2, i, j, sz), ctx);
+
+ return status;
+}
+
+int
+gr_mat_entrywise_binary_op_scalar(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx))
+ return GR_DOMAIN;
+
+ for (i = 0; i < R; i++)
+ for (j = 0; j < C; j++)
+ status |= f(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat, i, j, sz), c, ctx);
+
+ return status;
+}
+
+truth_t
+gr_mat_entrywise_unary_predicate_all(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ truth_t val, ans = T_TRUE;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ for (i = 0; i < R; i++)
+ {
+ for (j = 0; j < C; j++)
+ {
+ val = f(GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ if (val == T_FALSE)
+ return T_FALSE;
+ ans = truth_and(ans, val);
+ }
+ }
+
+ return ans;
+}
+
+truth_t
+gr_mat_entrywise_unary_predicate_any(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ truth_t val, ans = T_FALSE;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ for (i = 0; i < R; i++)
+ {
+ for (j = 0; j < C; j++)
+ {
+ val = f(GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ if (val == T_TRUE)
+ return T_TRUE;
+ ans = truth_or(ans, val);
+ }
+ }
+
+ return ans;
+}
+
+truth_t
+gr_mat_entrywise_binary_predicate_all(gr_method_binary_predicate f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ truth_t val, ans = T_TRUE;
+
+ R = gr_mat_nrows(mat1, ctx);
+ C = gr_mat_ncols(mat1, ctx);
+
+ if (R != gr_mat_nrows(mat2, ctx) || C != gr_mat_ncols(mat2, ctx))
+ return T_FALSE;
+
+ for (i = 0; i < R; i++)
+ {
+ for (j = 0; j < C; j++)
+ {
+ val = f(GR_MAT_ENTRY(mat1, i, j, sz), GR_MAT_ENTRY(mat2, i, j, sz), ctx);
+ if (val == T_FALSE)
+ return T_FALSE;
+ ans = truth_and(ans, val);
+ }
+ }
+
+ return ans;
+}
diff --git a/src/gr_mat/norm.c b/src/gr_mat/norm.c
new file mode 100644
index 0000000000..c5cc7abdad
--- /dev/null
+++ b/src/gr_mat/norm.c
@@ -0,0 +1,167 @@
+/*
+ Copyright (C) 2024 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "gr.h"
+#include "gr_mat.h"
+
+/* todo: allow overloading the following methods (or at least use
+ vector functions)
+ todo: quick bound versions */
+
+int
+gr_mat_norm_max(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+ gr_ptr t;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ if (R == 0 || C == 0)
+ return gr_zero(res, ctx);
+
+ GR_TMP_INIT(t, ctx);
+
+ for (i = 0; i < R; i++)
+ {
+ for (j = 0; j < C; j++)
+ {
+ if (i == 0 && j == 0)
+ status |= gr_abs(res, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ else
+ {
+ status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ status |= gr_max(res, res, t, ctx);
+ }
+ }
+ }
+
+ GR_TMP_CLEAR(t, ctx);
+
+ return status;
+}
+
+int
+gr_mat_norm_1(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+ gr_ptr s, t;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ if (R == 0 || C == 0)
+ return gr_zero(res, ctx);
+
+ GR_TMP_INIT2(s, t, ctx);
+
+ for (j = 0; j < C; j++)
+ {
+ for (i = 0; i < R; i++)
+ {
+ if (i == 0)
+ status |= gr_abs(s, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ else
+ {
+ status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ status |= gr_add(s, s, t, ctx);
+ }
+ }
+
+ if (j == 0)
+ gr_swap(res, s, ctx);
+ else
+ status |= gr_max(res, res, s, ctx);
+ }
+
+ GR_TMP_CLEAR2(s, t, ctx);
+
+ return status;
+}
+
+int
+gr_mat_norm_inf(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+ gr_ptr s, t;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ if (R == 0 || C == 0)
+ return gr_zero(res, ctx);
+
+ GR_TMP_INIT2(s, t, ctx);
+
+ for (i = 0; i < R; i++)
+ {
+ for (j = 0; j < C; j++)
+ {
+ if (j == 0)
+ status |= gr_abs(s, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ else
+ {
+ status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ status |= gr_add(s, s, t, ctx);
+ }
+ }
+
+ if (i == 0)
+ gr_swap(res, s, ctx);
+ else
+ status |= gr_max(res, res, s, ctx);
+ }
+
+ GR_TMP_CLEAR2(s, t, ctx);
+
+ return status;
+}
+
+int
+gr_mat_norm_frobenius(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+ gr_ptr t;
+
+ R = gr_mat_nrows(mat, ctx);
+ C = gr_mat_ncols(mat, ctx);
+
+ if (R == 0 || C == 0)
+ return gr_zero(res, ctx);
+
+ GR_TMP_INIT(t, ctx);
+
+ status |= gr_zero(res, ctx);
+
+ for (i = 0; i < R; i++)
+ {
+ for (j = 0; j < C; j++)
+ {
+ status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx);
+ status |= gr_sqr(t, t, ctx);
+ status |= gr_add(res, res, t, ctx);
+ }
+ }
+
+ status |= gr_sqrt(res, res, ctx);
+
+ GR_TMP_CLEAR(t, ctx);
+
+ return status;
+}
diff --git a/src/gr_mat/randtest.c b/src/gr_mat/randtest.c
index ce30c13ad9..e4a820de4e 100644
--- a/src/gr_mat/randtest.c
+++ b/src/gr_mat/randtest.c
@@ -16,16 +16,23 @@
int
gr_mat_randtest(gr_mat_t mat, flint_rand_t state, gr_ctx_t ctx)
{
- int status;
- slong i, r, c;
+ int status = GR_SUCCESS;
+ slong i, j, r, c;
+ slong sz = ctx->sizeof_elem;
r = gr_mat_nrows(mat, ctx);
c = gr_mat_ncols(mat, ctx);
- status = GR_SUCCESS;
- for (i = 0; i < r; i++)
+ if (n_randint(state, 10) == 0)
{
- status |= _gr_vec_randtest(mat->rows[i], state, c, ctx);
+ for (i = 0; i < r; i++)
+ for (j = 0; j < c; j++)
+ status |= gr_randtest(GR_MAT_ENTRY(mat, i, j, sz), state, ctx);
+ }
+ else
+ {
+ for (i = 0; i < r; i++)
+ status |= _gr_vec_randtest(mat->rows[i], state, c, ctx);
}
return status;
diff --git a/src/gr_mat/set_gr_mat_other.c b/src/gr_mat/set_gr_mat_other.c
new file mode 100644
index 0000000000..a410ae3d8a
--- /dev/null
+++ b/src/gr_mat/set_gr_mat_other.c
@@ -0,0 +1,34 @@
+/*
+ Copyright (C) 2022 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "gr_vec.h"
+#include "gr_mat.h"
+
+int
+gr_mat_set_gr_mat_other(gr_mat_t res, const gr_mat_t mat, gr_ctx_t mat_ctx, gr_ctx_t ctx)
+{
+ slong R, C, i, j;
+ slong sz = ctx->sizeof_elem;
+ slong mat_sz = mat_ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+
+ R = gr_mat_nrows(mat, mat_ctx);
+ C = gr_mat_ncols(mat, mat_ctx);
+
+ if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx))
+ return GR_DOMAIN;
+
+ for (i = 0; i < R; i++)
+ for (j = 0; j < C && status == GR_SUCCESS; j++)
+ status |= gr_set_other(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat, i, j, mat_sz), mat_ctx, ctx);
+
+ return status;
+}
\ No newline at end of file
diff --git a/src/gr_mat/test_approx_mul.c b/src/gr_mat/test_approx_mul.c
new file mode 100644
index 0000000000..e3c3c69424
--- /dev/null
+++ b/src/gr_mat/test_approx_mul.c
@@ -0,0 +1,234 @@
+/*
+ Copyright (C) 2022, 2024 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you grn redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "gr.h"
+#include "gr_mat.h"
+
+void gr_mat_test_approx_mul_max_norm(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx)
+{
+ slong iter;
+ gr_ctx_ptr given_ctx = ctx;
+
+ for (iter = 0; iter < iters; iter++)
+ {
+ gr_mat_t A, B, C, D, ERR;
+ gr_ptr err, amag, bmag, tol;
+ slong a, b, c;
+ int status = GR_SUCCESS;
+ gr_ctx_t my_ctx;
+ gr_ctx_ptr ctx;
+
+ if (given_ctx == NULL)
+ {
+ gr_ctx_init_random(my_ctx, state);
+ ctx = my_ctx;
+ }
+ else
+ ctx = given_ctx;
+
+ if (n_randint(state, 4) == 0)
+ {
+ a = b = c = n_randint(state, maxn);
+ }
+ else
+ {
+ a = n_randint(state, maxn);
+ b = n_randint(state, maxn);
+ c = n_randint(state, maxn);
+ }
+
+ gr_mat_init(A, a, b, ctx);
+ gr_mat_init(B, b, c, ctx);
+ gr_mat_init(C, a, c, ctx);
+ gr_mat_init(D, a, c, ctx);
+ gr_mat_init(ERR, a, c, ctx);
+ err = gr_heap_init(ctx);
+ amag = gr_heap_init(ctx);
+ bmag = gr_heap_init(ctx);
+ tol = gr_heap_init(ctx);
+
+ status |= gr_mat_randtest(A, state, ctx);
+ status |= gr_mat_randtest(B, state, ctx);
+ status |= gr_mat_entrywise_unary_op(A, (gr_method_unary_op) gr_abs, A, ctx);
+ status |= gr_mat_entrywise_unary_op(B, (gr_method_unary_op) gr_abs, B, ctx);
+
+ status |= gr_mat_randtest(C, state, ctx);
+ status |= gr_mat_randtest(D, state, ctx);
+
+ if (b == c && n_randint(state, 2))
+ {
+ status |= gr_mat_set(C, A, ctx);
+ status |= mul_impl(C, C, B, ctx);
+ }
+ else if (a == b && n_randint(state, 2))
+ {
+ status |= gr_mat_set(C, B, ctx);
+ status |= mul_impl(C, A, C, ctx);
+ }
+ else if (a == b && b == c && n_randint(state, 2))
+ {
+ status |= gr_mat_set(B, A, ctx);
+ status |= mul_impl(C, A, A, ctx);
+ }
+ else if (a == b && b == c && n_randint(state, 2))
+ {
+ status |= gr_mat_set(B, A, ctx);
+ status |= gr_mat_set(C, A, ctx);
+ status |= mul_impl(C, C, C, ctx);
+ }
+ else
+ {
+ status |= mul_impl(C, A, B, ctx);
+ }
+
+ status |= gr_mat_mul_classical(D, A, B, ctx);
+
+ status |= gr_mat_sub(ERR, C, D, ctx);
+
+ status |= gr_mat_norm_max(err, ERR, ctx);
+ status |= gr_mat_norm_max(amag, A, ctx);
+ status |= gr_mat_norm_max(bmag, B, ctx);
+ status |= gr_mul(tol, amag, bmag, ctx);
+ status |= gr_mul(tol, tol, rel_tol, ctx);
+
+ if (status == GR_SUCCESS && gr_le(err, tol, ctx) == T_FALSE)
+ {
+ flint_printf("FAIL:\n");
+ gr_ctx_println(ctx);
+ flint_printf("A:\n"); gr_mat_print(A, ctx); flint_printf("\n\n");
+ flint_printf("B:\n"); gr_mat_print(B, ctx); flint_printf("\n\n");
+ flint_printf("C:\n"); gr_mat_print(C, ctx); flint_printf("\n\n");
+ flint_printf("D:\n"); gr_mat_print(D, ctx); flint_printf("\n\n");
+ flint_printf("ERR:\n"); gr_mat_print(ERR, ctx); flint_printf("\n\n");
+ flint_printf("err:\n"); gr_println(err, ctx); flint_printf("\n\n");
+ flint_printf("tol:\n"); gr_println(tol, ctx); flint_printf("\n\n");
+ flint_abort();
+ }
+
+ gr_mat_clear(A, ctx);
+ gr_mat_clear(B, ctx);
+ gr_mat_clear(C, ctx);
+ gr_mat_clear(D, ctx);
+ gr_heap_clear(err, ctx);
+ gr_heap_clear(amag, ctx);
+ gr_heap_clear(bmag, ctx);
+ gr_heap_clear(tol, ctx);
+
+ if (given_ctx == NULL)
+ gr_ctx_clear(ctx);
+ }
+}
+
+void gr_mat_test_approx_mul_pos_entrywise_accurate(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx)
+{
+ slong iter;
+ gr_ctx_ptr given_ctx = ctx;
+
+ for (iter = 0; iter < iters; iter++)
+ {
+ gr_mat_t A, B, C, D, ERR, TOL;
+ slong a, b, c;
+ int status = GR_SUCCESS;
+ gr_ctx_t my_ctx;
+ gr_ctx_ptr ctx;
+
+ if (given_ctx == NULL)
+ {
+ gr_ctx_init_random(my_ctx, state);
+ ctx = my_ctx;
+ }
+ else
+ ctx = given_ctx;
+
+ if (n_randint(state, 4) == 0)
+ {
+ a = b = c = n_randint(state, maxn);
+ }
+ else
+ {
+ a = n_randint(state, maxn);
+ b = n_randint(state, maxn);
+ c = n_randint(state, maxn);
+ }
+
+ gr_mat_init(A, a, b, ctx);
+ gr_mat_init(B, b, c, ctx);
+ gr_mat_init(C, a, c, ctx);
+ gr_mat_init(D, a, c, ctx);
+ gr_mat_init(ERR, a, c, ctx);
+ gr_mat_init(TOL, a, c, ctx);
+
+ status |= gr_mat_randtest(A, state, ctx);
+ status |= gr_mat_randtest(B, state, ctx);
+ status |= gr_mat_entrywise_unary_op(A, (gr_method_unary_op) gr_abs, A, ctx);
+ status |= gr_mat_entrywise_unary_op(B, (gr_method_unary_op) gr_abs, B, ctx);
+
+ status |= gr_mat_randtest(C, state, ctx);
+ status |= gr_mat_randtest(D, state, ctx);
+
+ if (b == c && n_randint(state, 2))
+ {
+ status |= gr_mat_set(C, A, ctx);
+ status |= mul_impl(C, C, B, ctx);
+ }
+ else if (a == b && n_randint(state, 2))
+ {
+ status |= gr_mat_set(C, B, ctx);
+ status |= mul_impl(C, A, C, ctx);
+ }
+ else if (a == b && b == c && n_randint(state, 2))
+ {
+ status |= gr_mat_set(B, A, ctx);
+ status |= mul_impl(C, A, A, ctx);
+ }
+ else if (a == b && b == c && n_randint(state, 2))
+ {
+ status |= gr_mat_set(B, A, ctx);
+ status |= gr_mat_set(C, A, ctx);
+ status |= mul_impl(C, C, C, ctx);
+ }
+ else
+ {
+ status |= mul_impl(C, A, B, ctx);
+ }
+
+ status |= gr_mat_mul_classical(D, A, B, ctx);
+
+ /* |C-D| <= |D| tol */
+ status |= gr_mat_sub(ERR, C, D, ctx);
+ status |= gr_mat_entrywise_unary_op(ERR, (gr_method_unary_op) gr_abs, ERR, ctx);
+ status |= gr_mat_entrywise_unary_op(TOL, (gr_method_unary_op) gr_abs, D, ctx);
+ status |= gr_mat_mul_scalar(TOL, TOL, rel_tol, ctx);
+
+ if (status == GR_SUCCESS && gr_mat_entrywise_binary_predicate_all((gr_method_binary_predicate) gr_le, ERR, TOL, ctx) == T_FALSE)
+ {
+ flint_printf("FAIL:\n");
+ gr_ctx_println(ctx);
+ flint_printf("A:\n"); gr_mat_print(A, ctx); flint_printf("\n\n");
+ flint_printf("B:\n"); gr_mat_print(B, ctx); flint_printf("\n\n");
+ flint_printf("C:\n"); gr_mat_print(C, ctx); flint_printf("\n\n");
+ flint_printf("D:\n"); gr_mat_print(D, ctx); flint_printf("\n\n");
+ flint_printf("ERR:\n"); gr_mat_print(ERR, ctx); flint_printf("\n\n");
+ flint_printf("TOL:\n"); gr_mat_print(TOL, ctx); flint_printf("\n\n");
+ flint_abort();
+ }
+
+ gr_mat_clear(A, ctx);
+ gr_mat_clear(B, ctx);
+ gr_mat_clear(C, ctx);
+ gr_mat_clear(D, ctx);
+ gr_mat_clear(ERR, ctx);
+ gr_mat_clear(TOL, ctx);
+
+ if (given_ctx == NULL)
+ gr_ctx_clear(ctx);
+ }
+}
diff --git a/src/nfloat.h b/src/nfloat.h
index 45a16d3eee..78b764a213 100644
--- a/src/nfloat.h
+++ b/src/nfloat.h
@@ -453,6 +453,11 @@ int _nfloat_vec_submul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat
int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
+int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx);
+int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+
/* Complex numbers */
/* Note: we use the same context data for real and complex rings
(only which_ring and sizeof_elem differ). This allows us to call
diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c
index 250f15bf8d..6178b877cc 100644
--- a/src/nfloat/ctx.c
+++ b/src/nfloat/ctx.c
@@ -173,8 +173,8 @@ gr_method_tab_input _nfloat_methods_input[] =
/*
{GR_METHOD_POLY_MULLOW, (gr_funcptr) nfloat_poly_mullow},
{GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other},
- {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul},
*/
+ {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul},
{GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field},
{GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs},
diff --git a/src/nfloat/mat_mul.c b/src/nfloat/mat_mul.c
new file mode 100644
index 0000000000..169722a3ec
--- /dev/null
+++ b/src/nfloat/mat_mul.c
@@ -0,0 +1,931 @@
+/*
+ Copyright (C) 2024 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "mpn_extras.h"
+#include "gr.h"
+#include "gr_mat.h"
+#include "gr_generic.h"
+#include "acf.h"
+#include "acb.h"
+#include "nfloat.h"
+
+#include "gr.h"
+#include "nfloat.h"
+#include "gr_vec.h"
+#include "gr_mat.h"
+#include "gr_special.h"
+#include "fmpz_mat.h"
+
+
+int
+nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+
+
+/* For printing */
+#include "arf.h"
+
+/* Arithmetic on fixed-point numbers in (-1,1) */
+/* x[0] stores the sign bit, x[1], ..., x[n] store the absolute value */
+
+void
+nfixed_print(nn_srcptr x, slong nlimbs, slong exp)
+{
+ arf_t t;
+ arf_init(t);
+ _arf_set_mpn_fixed(t, x + 1, nlimbs, nlimbs, x[0], nlimbs * FLINT_BITS, ARF_RND_DOWN);
+ arf_mul_2exp_si(t, t, exp);
+ arf_printd(t, nlimbs * FLINT_BITS / 3.321928 + 1);
+ arf_clear(t);
+}
+
+
+/* todo: don't do this */
+#define NFIXED_MAX_NLIMBS (2 * NFLOAT_MAX_LIMBS)
+
+FLINT_FORCE_INLINE
+void nfixed_add(nn_ptr res, nn_srcptr a, nn_srcptr b, slong nlimbs)
+{
+ int asgn, bsgn;
+ asgn = a[0];
+ bsgn = b[0];
+
+ if (asgn == bsgn)
+ {
+ res[0] = asgn;
+ mpn_add_n(res + 1, a + 1, b + 1, nlimbs);
+ }
+ else
+ {
+ res[0] = asgn ^ flint_mpn_signed_sub_n(res + 1, a + 1, b + 1, nlimbs);
+ }
+}
+
+FLINT_FORCE_INLINE
+void nfixed_sub(nn_ptr res, nn_srcptr a, nn_srcptr b, slong nlimbs)
+{
+ int asgn, bsgn;
+ asgn = a[0];
+ bsgn = b[0];
+
+ if (asgn != bsgn)
+ {
+ res[0] = asgn;
+ mpn_add_n(res + 1, a + 1, b + 1, nlimbs);
+ }
+ else
+ {
+ res[0] = asgn ^ flint_mpn_signed_sub_n(res + 1, a + 1, b + 1, nlimbs);
+ }
+}
+
+FLINT_FORCE_INLINE
+void nfixed_mul(nn_ptr res, nn_srcptr a, nn_srcptr b, slong nlimbs)
+{
+ int asgn, bsgn;
+ asgn = a[0];
+ bsgn = b[0];
+ res[0] = asgn ^ bsgn;
+ flint_mpn_mulhigh_n(res + 1, a + 1, b + 1, nlimbs);
+}
+
+FLINT_FORCE_INLINE
+void nfixed_sqr(nn_ptr res, nn_srcptr a, slong nlimbs)
+{
+ res[0] = 0;
+ flint_mpn_sqrhigh(res + 1, a + 1, nlimbs);
+}
+
+FLINT_FORCE_INLINE
+void nfixed_div2(nn_ptr res, nn_srcptr a, slong nlimbs)
+{
+ res[0] = a[0];
+ mpn_rshift(res + 1, a + 1, nlimbs, 1);
+}
+
+/* A is (m x n), B is (n x p), C is (m x p) */
+void
+_nfixed_mat_mul_classical(nn_ptr C, nn_srcptr A, nn_srcptr B, slong m, slong n, slong p, slong nlimbs)
+{
+ slong i, j, k;
+ nn_ptr t;
+ TMP_INIT;
+
+ TMP_START;
+
+ t = TMP_ALLOC((nlimbs + 1) * sizeof(ulong));
+
+#define A_ENTRY(i, j) ((A) + ((i) * n + (j)) * (nlimbs + 1))
+#define B_ENTRY(i, j) ((B) + ((i) * p + (j)) * (nlimbs + 1))
+#define C_ENTRY(i, j) ((C) + ((i) * p + (j)) * (nlimbs + 1))
+
+ for (i = 0; i < m; i++)
+ {
+ for (j = 0; j < p; j++)
+ {
+ nfixed_mul(C_ENTRY(i, j), A_ENTRY(i, 0), B_ENTRY(0, j), nlimbs);
+
+ for (k = 1; k < n; k++)
+ {
+ nfixed_mul(t, A_ENTRY(i, k), B_ENTRY(k, j), nlimbs);
+ nfixed_add(C_ENTRY(i, j), C_ENTRY(i, j), t, nlimbs);
+ }
+ }
+ }
+
+ TMP_END;
+
+#undef A_ENTRY
+#undef B_ENTRY
+#undef C_ENTRY
+}
+
+/* compute c += (a1 + b1) * (a2 + b2) */
+/* val0, val1, val2 are scratch space */
+FLINT_FORCE_INLINE void
+addmul_addadd(nn_ptr val0, nn_ptr val1, nn_ptr val2, nn_ptr c, nn_srcptr a1, nn_srcptr b1, nn_srcptr a2, nn_srcptr b2, slong nlimbs)
+{
+ nfixed_add(val1, a1, b1, nlimbs);
+ nfixed_add(val2, a2, b2, nlimbs);
+ nfixed_mul(val0, val1, val2, nlimbs);
+ nfixed_add(c, c, val0, nlimbs);
+}
+
+/* compute c += (a1 - b1) * (a2 - b2) */
+/* val0, val1, val2 are scratch space */
+FLINT_FORCE_INLINE void
+addmul_subsub(nn_ptr val0, nn_ptr val1, nn_ptr val2, nn_ptr c, nn_srcptr a1, nn_srcptr b1, nn_srcptr a2, nn_srcptr b2, slong nlimbs)
+{
+ nfixed_sub(val1, a1, b1, nlimbs);
+ nfixed_sub(val2, a2, b2, nlimbs);
+ nfixed_mul(val0, val1, val2, nlimbs);
+ nfixed_add(c, c, val0, nlimbs);
+}
+
+void
+_nfixed_mat_mul_waksman(nn_ptr C, nn_srcptr A, nn_srcptr B, slong m, slong n, slong p, slong nlimbs)
+{
+ slong l, j, k;
+
+ nn_ptr Ctmp = flint_calloc((nlimbs + 1) * ((p + m) + 5), sizeof(ulong));
+ /* Ctmp itself has m * p entries */
+ nn_ptr Crow = Ctmp; /* Crow has p entries */
+ nn_ptr Ccol = Crow + (nlimbs + 1) * p; /* Ccol has m entries */
+ nn_ptr val0 = Ccol + (nlimbs + 1) * m; /* val0 has room for 2 sums */
+ nn_ptr val1 = val0 + (nlimbs + 1) * 2; /* val1 has room for 1 sum */
+ nn_ptr val2 = val1 + (nlimbs + 1); /* val2 has room for 1 sum */
+ nn_ptr crow = val2 + (nlimbs + 1); /* crow has room for 1 sum */
+
+#define A_ENTRY(i, j) ((A) + ((i) * n + (j)) * (nlimbs + 1))
+#define B_ENTRY(i, j) ((B) + ((i) * p + (j)) * (nlimbs + 1))
+#define C_ENTRY(i, j) ((C) + ((i) * p + (j)) * (nlimbs + 1))
+
+#define Crow_ENTRY(ii) (Crow + (ii) * (nlimbs + 1))
+#define Ccol_ENTRY(ii) (Ccol + (ii) * (nlimbs + 1))
+
+ slong np = n >> 1;
+
+ for (j = 1; j <= np; j++)
+ {
+ slong j2 = (j << 1) - 1;
+
+ for (k = 0; k < p; k++)
+ {
+ addmul_addadd(val0, val1, val2, C_ENTRY(0, k), A_ENTRY(0, j2-1), B_ENTRY(j2, k), A_ENTRY(0, j2), B_ENTRY(j2-1, k), nlimbs);
+ addmul_subsub(val0, val1, val2, Crow_ENTRY(k), A_ENTRY(0, j2-1), B_ENTRY(j2, k), A_ENTRY(0, j2), B_ENTRY(j2-1, k), nlimbs);
+ }
+
+ for (l = 1; l < m; l++)
+ {
+ addmul_addadd(val0, val1, val2, C_ENTRY(l, 0), A_ENTRY(l, j2-1), B_ENTRY(j2, 0), A_ENTRY(l, j2), B_ENTRY(j2-1, 0), nlimbs);
+ addmul_subsub(val0, val1, val2, Ccol_ENTRY(l), A_ENTRY(l, j2-1), B_ENTRY(j2, 0), A_ENTRY(l, j2), B_ENTRY(j2-1, 0), nlimbs);
+ }
+
+ for (k = 1; k < p; k++)
+ {
+ for (l = 1; l < m; l++)
+ {
+ addmul_addadd(val0, val1, val2, C_ENTRY(l, k), A_ENTRY(l, j2-1), B_ENTRY(j2, k), A_ENTRY(l, j2), B_ENTRY(j2-1, k), nlimbs);
+ }
+ }
+ }
+
+ for (l = 1; l < m; l++)
+ {
+ nfixed_add(val1, Ccol_ENTRY(l), C_ENTRY(l, 0), nlimbs);
+ nfixed_div2(Ccol_ENTRY(l), val1, nlimbs);
+ nfixed_sub(C_ENTRY(l, 0), C_ENTRY(l, 0), Ccol_ENTRY(l), nlimbs);
+ }
+
+ nfixed_add(val1, Crow, C_ENTRY(0, 0), nlimbs);
+ nfixed_div2(val0, val1, nlimbs);
+ nfixed_sub(C_ENTRY(0, 0), C_ENTRY(0, 0), val0, nlimbs);
+
+ for (k = 1; k < p; k++)
+ {
+ nfixed_add(crow, Crow_ENTRY(k), C_ENTRY(0, k), nlimbs);
+ nfixed_div2(val1, crow, nlimbs);
+ nfixed_sub(C_ENTRY(0, k), C_ENTRY(0, k), val1, nlimbs);
+ nfixed_sub(crow, val1, val0, nlimbs);
+
+ for (l = 1; l < m; l++)
+ {
+ nfixed_sub(val2, C_ENTRY(l, k), crow, nlimbs);
+ nfixed_sub(C_ENTRY(l, k), val2, Ccol_ENTRY(l), nlimbs);
+ }
+ }
+
+ if ((n & 1) == 1)
+ {
+ for (l = 0; l < m; l++)
+ {
+ for (k = 0; k < p; k++)
+ {
+ nfixed_mul(val0, A_ENTRY(l, n-1), B_ENTRY(n-1, k), nlimbs);
+ nfixed_add(C_ENTRY(l, k), C_ENTRY(l, k), val0, nlimbs);
+ }
+ }
+ }
+
+ flint_free(Ctmp);
+
+#undef A_ENTRY
+#undef B_ENTRY
+#undef C_ENTRY
+}
+
+FLINT_FORCE_INLINE void
+_nfloat_get_nfixed(nn_ptr res, nn_srcptr x, slong exp, slong fix_nlimbs, gr_ctx_t ctx)
+{
+ slong rel_exp;
+
+ /* assumes res is already zeroed */
+ if (NFLOAT_IS_ZERO(x))
+ return;
+
+ rel_exp = NFLOAT_EXP(x) - exp;
+ if (rel_exp >= 0)
+ flint_abort();
+
+ res[0] = NFLOAT_SGNBIT(x);
+ _arf_get_integer_mpn(res + 1, NFLOAT_D(x), NFLOAT_CTX_NLIMBS(ctx), fix_nlimbs * FLINT_BITS + rel_exp);
+}
+
+FLINT_FORCE_INLINE int
+_nfloat_set_nfixed(nn_ptr res, nn_srcptr x, slong exp, slong fix_nlimbs, gr_ctx_t ctx)
+{
+ return nfloat_set_mpn_2exp(res, x + 1, fix_nlimbs, exp, x[0], ctx);
+}
+
+static void
+_nfloat_mat_exp_range(slong * _Amin, slong * _Amax, const gr_mat_t A, gr_ctx_t ctx)
+{
+ slong Amax, Amin;
+ slong m = A->r;
+ slong n = A->c;
+ slong exp, i, j;
+ slong sz = ctx->sizeof_elem;
+
+ Amax = WORD_MIN;
+ Amin = WORD_MAX;
+
+ for (i = 0; i < m; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ exp = NFLOAT_EXP(GR_MAT_ENTRY(A, i, j, sz));
+ Amax = FLINT_MAX(Amax, exp);
+ Amin = FLINT_MIN(Amin, exp);
+ }
+ }
+
+ _Amin[0] = Amin;
+ _Amax[0] = Amax;
+}
+
+int
+_nfloat_mat_mul_fixed_given_exp(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong Aexp, slong Bexp, slong fnlimbs, int waksman, gr_ctx_t ctx)
+{
+ nn_ptr T, TA, TB, TC;
+ slong i, j;
+ slong sz = ctx->sizeof_elem;
+ slong fdnlimbs;
+
+ slong m = A->r;
+ slong n = A->c;
+ slong p = B->c;
+
+ /* limbs including sign limb */
+ fdnlimbs = fnlimbs + 1;
+
+ T = flint_calloc(fdnlimbs * (m * n + n * p + m * p), sizeof(ulong));
+
+ TA = T;
+ TB = TA + fdnlimbs * (m * n);
+ TC = TB + fdnlimbs * (n * p);
+
+ for (i = 0; i < m; i++)
+ for (j = 0; j < n; j++)
+ _nfloat_get_nfixed(TA + i * fdnlimbs * n + j * fdnlimbs, GR_MAT_ENTRY(A, i, j, sz), Aexp, fnlimbs, ctx);
+
+ for (i = 0; i < n; i++)
+ for (j = 0; j < p; j++)
+ _nfloat_get_nfixed(TB + i * fdnlimbs * p + j * fdnlimbs, GR_MAT_ENTRY(B, i, j, sz), Bexp, fnlimbs, ctx);
+
+ if (waksman)
+ _nfixed_mat_mul_waksman(TC, TA, TB, m, n, p, fnlimbs);
+ else
+ _nfixed_mat_mul_classical(TC, TA, TB, m, n, p, fnlimbs);
+
+ for (i = 0; i < m; i++)
+ for (j = 0; j < p; j++)
+ _nfloat_set_nfixed(GR_MAT_ENTRY(C, i, j, sz), TC + i * fdnlimbs * p + j * fdnlimbs, Aexp + Bexp, fnlimbs, ctx);
+
+ flint_free(T);
+
+ return GR_SUCCESS;
+}
+
+int
+_nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksman, slong max_extra_bits, gr_ctx_t ctx)
+{
+ slong Amax, Amin, Bmax, Bmin, Adelta, Bdelta, Aexp, Bexp;
+ slong prec;
+ slong pad_top, pad_bot, extra_bits, fbits, fnlimbs;
+ slong n = A->c;
+
+ if (NFLOAT_CTX_HAS_INF_NAN(ctx))
+ return GR_UNABLE;
+
+ prec = NFLOAT_CTX_PREC(ctx);
+
+ _nfloat_mat_exp_range(&Amin, &Amax, A, ctx);
+ _nfloat_mat_exp_range(&Bmin, &Bmax, B, ctx);
+
+ if (Amax < NFLOAT_MIN_EXP || Bmax < NFLOAT_MIN_EXP)
+ return gr_mat_zero(C, ctx);
+
+ /* Currently, we don't handle zeros. (They pose no problem, but zero entries in
+ the output may not be exact. To be done.) */
+ if (Amin < NFLOAT_MIN_EXP || Bmin < NFLOAT_MIN_EXP)
+ return gr_mat_mul_classical(C, A, B, ctx);
+
+ Adelta = Amax - Amin;
+ Bdelta = Bmax - Bmin;
+
+ /* sanity check */
+ if (Adelta > 10 * prec || Bdelta > 10 * prec)
+ return gr_mat_mul_classical(C, A, B, ctx);
+
+ /*
+ To double check: for Waksman,
+ * The intermediate entries are bounded by 8n max(|A|,|B|)^2.
+ * The error, including error from converting
+ the input matrices, is bounded by 8n ulps.
+ */
+
+ pad_top = 3 + FLINT_BIT_COUNT(n);
+ pad_bot = 3 + FLINT_BIT_COUNT(n);
+
+ extra_bits = Adelta + Bdelta + pad_top + pad_bot;
+
+ if (extra_bits >= max_extra_bits)
+ return gr_mat_mul_classical(C, A, B, ctx);
+
+ Aexp = Amax + pad_top;
+ Bexp = Bmax + pad_top;
+ fbits = prec + extra_bits;
+ fnlimbs = (fbits + FLINT_BITS - 1) / FLINT_BITS;
+
+ return _nfloat_mat_mul_fixed_given_exp(C, A, B, Aexp, Bexp, fnlimbs, waksman, ctx);
+}
+
+int
+nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+{
+ return _nfloat_mat_mul_fixed(C, A, B, 0, 100000, ctx);
+}
+
+int
+nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+{
+ return _nfloat_mat_mul_fixed(C, A, B, 1, 100000, ctx);
+}
+
+
+static void
+_nfloat_2exp_get_fmpz(fmpz_t res, nfloat_srcptr x, slong fixexp, gr_ctx_t ctx)
+{
+ slong exp, zn;
+ mpz_ptr zz;
+ nn_ptr zp;
+ int negative;
+ slong nlimbs = NFLOAT_CTX_NLIMBS(ctx);
+
+ if (NFLOAT_IS_SPECIAL(x))
+ {
+ fmpz_zero(res);
+ return;
+ }
+
+ exp = NFLOAT_EXP(x) - fixexp;
+
+ if (exp <= 0)
+ {
+ fmpz_zero(res);
+ return;
+ }
+
+ /* todo: small case */
+
+ negative = NFLOAT_SGNBIT(x);
+
+ zn = (exp + FLINT_BITS - 1) / FLINT_BITS;
+ zz = _fmpz_promote(res);
+ zp = FLINT_MPZ_REALLOC(zz, zn);
+ _arf_get_integer_mpn(zp, NFLOAT_D(x), nlimbs, exp);
+ zz->_mp_size = negative ? -zn : zn;
+ _fmpz_demote_val(res);
+}
+
+int
+nfloat_mat_addmul_block_fallback(gr_mat_t C,
+ const gr_mat_t A, const gr_mat_t B,
+ slong block_start,
+ slong block_end,
+ gr_ctx_t ctx)
+{
+ slong M, P, n;
+ slong i, j, sz;
+ nn_ptr tmpB;
+ slong ndlimbs = NFLOAT_CTX_DATA_NLIMBS(ctx);
+ sz = ctx->sizeof_elem;
+ int status = GR_SUCCESS;
+
+ M = A->r;
+ P = B->c;
+
+ n = block_end - block_start;
+
+ tmpB = flint_malloc(sizeof(ulong) * ndlimbs * (P * n));
+
+#define AA(ii, jj) GR_MAT_ENTRY(A, ii, block_start + (jj), sz)
+
+ for (i = 0; i < P; i++)
+ for (j = 0; j < n; j++)
+ flint_mpn_copyi(tmpB + (i * n + j) * ndlimbs, GR_MAT_ENTRY(B, block_start + j, i, sz), ndlimbs);
+
+ for (i = 0; i < M; i++)
+ {
+ for (j = 0; j < P; j++)
+ {
+ status |= _nfloat_vec_dot(GR_MAT_ENTRY(C, i, j, sz),
+ (block_start == 0) ? NULL : GR_MAT_ENTRY(C, i, j, sz), 0,
+ GR_MAT_ENTRY(A, i, block_start, sz),
+ tmpB + j * n * ndlimbs, n, ctx);
+ }
+ }
+
+ flint_free(tmpB);
+
+ return status;
+}
+
+int
+nfloat_mat_addmul_block_prescaled(gr_mat_t C,
+ const gr_mat_t A, const gr_mat_t B,
+ slong block_start,
+ slong block_end,
+ const slong * A_min, /* A per-row bottom exponent */
+ const slong * B_min, /* B per-row bottom exponent */
+ gr_ctx_t ctx)
+{
+ slong M, P, n;
+ slong i, j;
+ slong M0, M1, P0, P1, Mstep, Pstep;
+ int status = GR_SUCCESS;
+ slong sz = ctx->sizeof_elem;
+ ulong t[NFLOAT_MAX_ALLOC];
+ slong e;
+
+ M = A->r;
+ P = B->c;
+
+ n = block_end - block_start;
+
+ /* Create sub-blocks to keep matrices nearly square. Necessary? */
+#if 1
+ Mstep = (M < 2 * n) ? M : n;
+ Pstep = (P < 2 * n) ? P : n;
+#else
+ Mstep = M;
+ Pstep = P;
+#endif
+
+ for (M0 = 0; M0 < M; M0 += Mstep)
+ {
+ for (P0 = 0; P0 < P; P0 += Pstep)
+ {
+ fmpz_mat_t AA, BB, CC;
+
+ M1 = FLINT_MIN(M0 + Mstep, M);
+ P1 = FLINT_MIN(P0 + Pstep, P);
+
+ fmpz_mat_init(AA, M1 - M0, n);
+ fmpz_mat_init(BB, n, P1 - P0);
+ fmpz_mat_init(CC, M1 - M0, P1 - P0);
+
+ /* Convert to fixed-point matrices. */
+ for (i = M0; i < M1; i++)
+ {
+ if (A_min[i] == WORD_MIN) /* only zeros in this row */
+ continue;
+
+ for (j = 0; j < n; j++)
+ _nfloat_2exp_get_fmpz(fmpz_mat_entry(AA, i - M0, j), GR_MAT_ENTRY(A, i, block_start + j, sz), A_min[i], ctx);
+ }
+
+ for (i = P0; i < P1; i++)
+ {
+ if (B_min[i] == WORD_MIN) /* only zeros in this column */
+ continue;
+
+ for (j = 0; j < n; j++)
+ _nfloat_2exp_get_fmpz(fmpz_mat_entry(BB, j, i - P0), GR_MAT_ENTRY(B, block_start + j, i, sz), B_min[i], ctx);
+ }
+
+ /* The main multiplication */
+ fmpz_mat_mul(CC, AA, BB);
+ fmpz_mat_clear(AA);
+ fmpz_mat_clear(BB);
+
+ /* Add to the result matrix */
+ for (i = M0; i < M1; i++)
+ {
+ for (j = P0; j < P1; j++)
+ {
+ e = A_min[i] + B_min[j];
+
+ /* The first time we write this Cij */
+ if (block_start == 0)
+ {
+ status |= nfloat_set_fmpz(GR_MAT_ENTRY(C, i, j, sz), fmpz_mat_entry(CC, i - M0, j - P0), ctx);
+ status |= nfloat_mul_2exp_si(GR_MAT_ENTRY(C, i, j, sz), GR_MAT_ENTRY(C, i, j, sz), e, ctx);
+ }
+ else
+ {
+ status |= nfloat_set_fmpz(t, fmpz_mat_entry(CC, i - M0, j - P0), ctx);
+ status |= nfloat_mul_2exp_si(t, t, e, ctx);
+ status |= nfloat_add(GR_MAT_ENTRY(C, i, j, sz), GR_MAT_ENTRY(C, i, j, sz), t, ctx);
+ }
+ }
+ }
+
+ fmpz_mat_clear(CC);
+ }
+ }
+
+ return status;
+}
+
+FLINT_FORCE_INLINE slong
+_nfloat_nbits(nfloat_srcptr x, slong nlimbs)
+{
+ nn_srcptr ad;
+ slong bits;
+
+ ad = NFLOAT_D(x);
+ bits = FLINT_BITS * nlimbs;
+
+ while (ad[0] == 0)
+ {
+ bits -= FLINT_BITS;
+ ad++;
+ }
+
+ bits -= flint_ctz(ad[0]);
+
+ return bits;
+}
+
+/* todo: squaring optimizations */
+int
+nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx)
+{
+ slong M, N, P;
+ slong *A_min, *A_max, *B_min, *B_max;
+ short *A_bits, *B_bits;
+ slong *A_bot, *B_bot;
+ slong block_start, block_end, i, j, bot, top, max_height;
+ slong b, A_max_bits, B_max_bits;
+ nfloat_srcptr t;
+ double A_density, B_density;
+ slong sz = ctx->sizeof_elem;
+ slong nlimbs = NFLOAT_CTX_NLIMBS(ctx);
+ slong prec = NFLOAT_CTX_PREC(ctx);
+ int status = GR_SUCCESS;
+
+ M = A->r;
+ N = A->c;
+ P = B->r;
+
+ if (N != B->r || M != C->r || P != C->c)
+ return GR_DOMAIN;
+
+ if (M == 0 || N == 0 || P == 0)
+ return gr_mat_zero(C, ctx);
+
+ if (NFLOAT_CTX_HAS_INF_NAN(ctx))
+ return GR_UNABLE;
+
+ if (A == C || B == C)
+ {
+ gr_mat_t T;
+ gr_mat_init(T, M, P, ctx);
+ status = nfloat_mat_mul_block(T, A, B, min_block_size, ctx);
+ status |= gr_mat_swap_entrywise(T, C, ctx);
+ gr_mat_clear(T, ctx);
+ return status;
+ }
+
+ /* bottom exponents of A */
+ A_bot = flint_malloc(sizeof(slong) * M * N);
+ /* minimum bottom exponent in current row */
+ A_min = flint_malloc(sizeof(slong) * M);
+ /* maximum top exponent in current row */
+ A_max = flint_malloc(sizeof(slong) * M);
+
+ B_bot = flint_malloc(sizeof(slong) * N * P);
+ B_min = flint_malloc(sizeof(slong) * P);
+ B_max = flint_malloc(sizeof(slong) * P);
+
+ /* save space using shorts to store the bit sizes temporarily;
+ the block algorithm will not be used at extremely high precision */
+ A_bits = flint_malloc(sizeof(short) * M * N);
+ B_bits = flint_malloc(sizeof(short) * N * P);
+
+ A_max_bits = B_max_bits = 0;
+ A_density = B_density = 0;
+
+ /* Build table of bottom exponents (WORD_MIN signifies a zero),
+ and also collect some statistics. */
+ for (i = 0; i < M; i++)
+ {
+ for (j = 0; j < N; j++)
+ {
+ t = GR_MAT_ENTRY(A, i, j, sz);
+ if (NFLOAT_IS_ZERO(t))
+ {
+ A_bot[i * N + j] = WORD_MIN;
+ A_bits[i * N + j] = 0;
+ }
+ else
+ {
+ b = _nfloat_nbits(t, nlimbs);
+ A_bot[i * N + j] = NFLOAT_EXP(t) - b;
+ A_bits[i * N + j] = b;
+ A_max_bits = FLINT_MAX(A_max_bits, b);
+ A_density++;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ for (j = 0; j < P; j++)
+ {
+ t = GR_MAT_ENTRY(B, i, j, sz);
+ if (NFLOAT_IS_ZERO(t))
+ {
+ B_bot[i * P + j] = WORD_MIN;
+ B_bits[i * P + j] = 0;
+ }
+ else
+ {
+ b = _nfloat_nbits(t, nlimbs);
+ B_bot[i * P + j] = NFLOAT_EXP(t) - b;
+ B_bits[i * P + j] = b;
+ B_max_bits = FLINT_MAX(B_max_bits, b);
+ B_density++;
+ }
+ }
+ }
+
+ A_density = A_density / (M * N);
+ B_density = B_density / (N * P);
+
+ /* Don't shift too far when creating integer block matrices. */
+ max_height = 1.25 * FLINT_MIN(prec, FLINT_MAX(A_max_bits, B_max_bits)) + 192;
+
+ /* FIXME: this condition is bogus */
+ if (A_density < 0.1 && B_density < 0.1 && max_height > 1024)
+ {
+ status = gr_mat_mul_classical(C, A, B, ctx);
+ goto cleanup;
+ }
+
+ block_start = 0;
+ while (block_start < N)
+ {
+ /* Find a run of columns of A and rows of B such that the
+ bottom exponents differ by at most max_height. */
+
+ block_end = block_start + 1; /* index is exclusive block_end */
+
+ /* begin with this column of A and row of B */
+ for (i = 0; i < M; i++)
+ {
+ A_max[i] = A_min[i] = A_bot[i * N + block_start];
+ A_max[i] += (slong) A_bits[i * N + block_start];
+ }
+
+ for (i = 0; i < P; i++)
+ {
+ B_max[i] = B_min[i] = B_bot[block_start * P + i];
+ B_max[i] += (slong) B_bits[block_start * P + i];
+ }
+
+ while (block_end < N)
+ {
+ double size;
+
+ /* End block if memory would be excessive. */
+ /* Necessary? */
+ /* Should also do initial check above, if C alone is too large. */
+ size = (block_end - block_start) * M * (double) A_max_bits;
+ size += (block_end - block_start) * P * (double) B_max_bits;
+ size += (M * P) * (double) (A_max_bits + B_max_bits);
+ size /= 8.0;
+ if (size > 2e9)
+ goto blocks_built;
+
+ /* check if we can extend with column [block_end] of A */
+ for (i = 0; i < M; i++)
+ {
+ bot = A_bot[i * N + block_end];
+ /* zeros are irrelevant */
+ if (bot == WORD_MIN || A_max[i] == WORD_MIN)
+ continue;
+ top = bot + (slong) A_bits[i * N + block_end];
+ /* jump will be too big */
+ if (top > A_min[i] + max_height || bot < A_max[i] - max_height)
+ goto blocks_built;
+ }
+
+ /* check if we can extend with row [block_end] of B */
+ for (i = 0; i < P; i++)
+ {
+ bot = B_bot[block_end * P + i];
+ if (bot == WORD_MIN || B_max[i] == WORD_MIN)
+ continue;
+ top = bot + (slong) B_bits[block_end * P + i];
+ if (top > B_min[i] + max_height || bot < B_max[i] - max_height)
+ goto blocks_built;
+ }
+
+ /* second pass to update the extreme values */
+ for (i = 0; i < M; i++)
+ {
+ bot = A_bot[i * N + block_end];
+ top = bot + (slong) A_bits[i * N + block_end];
+ if (A_max[i] == WORD_MIN)
+ {
+ A_max[i] = top;
+ A_min[i] = bot;
+ }
+ else if (bot != WORD_MIN)
+ {
+ if (bot < A_min[i]) A_min[i] = bot;
+ if (top > A_max[i]) A_max[i] = top;
+ }
+ }
+
+ for (i = 0; i < P; i++)
+ {
+ bot = B_bot[block_end * P + i];
+ top = bot + (slong) B_bits[block_end * P + i];
+ if (B_max[i] == WORD_MIN)
+ {
+ B_max[i] = top;
+ B_min[i] = bot;
+ }
+ else if (bot != WORD_MIN)
+ {
+ if (bot < B_min[i]) B_min[i] = bot;
+ if (top > B_max[i]) B_max[i] = top;
+ }
+ }
+
+ block_end++;
+ }
+
+ blocks_built:
+ if (block_end - block_start < min_block_size)
+ {
+ block_end = FLINT_MIN(N, block_start + min_block_size);
+ status |= nfloat_mat_addmul_block_fallback(C, A, B, block_start, block_end, ctx);
+ }
+ else
+ {
+ status |= nfloat_mat_addmul_block_prescaled(C, A, B, block_start, block_end, A_min, B_min, ctx);
+ }
+
+ block_start = block_end;
+ }
+
+cleanup:
+ flint_free(A_bot);
+ flint_free(A_max);
+ flint_free(A_min);
+ flint_free(B_bot);
+ flint_free(B_max);
+ flint_free(B_min);
+ flint_free(A_bits);
+ flint_free(B_bits);
+
+ return status;
+}
+
+/* Minimum precision for using fixed-point arithmetic */
+
+/* TODO: for *unsigned* matrices, there is a speedup already for
+ prec = 192. Consider inlining fixed-point additions/subtractions for
+ 4 and 5 limbs to extend this to general matrices. */
+/* #define NFLOAT_MAT_MUL_FIXED_CUTOFF 192 */
+#define NFLOAT_MAT_MUL_FIXED_CUTOFF 320
+
+/* first cutoff: classical -> fixed_classical
+ second cutoff: fixed_classical -> waksman */
+static const int nfloat_mat_mul_cutoff_tab[][2] = {
+ {0, 0}, /* prec = 0 */
+ {0, 0}, /* prec = 64 */
+ {0, 0}, /* prec = 128 */
+ {32, 32}, /* prec = 192 */
+ {8, 20}, /* prec = 256 */
+ {4, 15}, /* prec = 320 */
+ {3, 10}, /* prec = 384 */
+ {3, 10}, /* prec = 448 */
+ {3, 8}, /* prec = 512 */
+ {10, 10}, /* prec = 576 */
+ {4, 5}, /* prec = 640 */
+};
+
+/* {4, 4} from this point */
+#define NFLOAT_MAT_MUL_CUTOFF_4 704
+/* {3, 3} from this point */
+#define NFLOAT_MAT_MUL_CUTOFF_3 1600
+
+int
+nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+{
+ slong cutoff1, cutoff2, dim;
+ int use_waksman = 0;
+ slong prec;
+ slong max_extra_prec;
+
+ slong m = A->r;
+ slong n = A->c;
+ slong p = B->c;
+
+ dim = FLINT_MIN(n, FLINT_MIN(m, p));
+
+ if (dim <= 2 || NFLOAT_CTX_HAS_INF_NAN(ctx))
+ return gr_mat_mul_classical(C, A, B, ctx);
+
+ if (dim <= 80)
+ {
+ prec = NFLOAT_CTX_PREC(ctx);
+
+ if (prec < NFLOAT_MAT_MUL_FIXED_CUTOFF)
+ return gr_mat_mul_classical(C, A, B, ctx);
+
+ if (prec >= NFLOAT_MAT_MUL_CUTOFF_3)
+ cutoff1 = cutoff2 = 3;
+ else if (prec >= NFLOAT_MAT_MUL_CUTOFF_4)
+ cutoff1 = cutoff2 = 4;
+ else
+ {
+ cutoff1 = nfloat_mat_mul_cutoff_tab[prec / 64][0];
+ cutoff2 = nfloat_mat_mul_cutoff_tab[prec / 64][1];
+ }
+
+ if (dim < cutoff1)
+ return gr_mat_mul_classical(C, A, B, ctx);
+
+ use_waksman = (dim >= cutoff2);
+ max_extra_prec = (prec < 768) ? 64 : prec / 4;
+
+ return _nfloat_mat_mul_fixed(C, A, B, use_waksman, max_extra_prec, ctx);
+ }
+ else
+ {
+ return nfloat_mat_mul_block(C, A, B, 70, ctx);
+ }
+}
diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c
index ccf0293288..10cb533916 100644
--- a/src/nfloat/nfloat.c
+++ b/src/nfloat/nfloat.c
@@ -98,7 +98,7 @@ nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx)
int status;
arf_init(t);
- arf_randtest(t, state, NFLOAT_CTX_PREC(ctx), 10);
+ arf_randtest(t, state, NFLOAT_CTX_PREC(ctx), n_randint(state, 2) ? 2 : 10);
status = nfloat_set_arf(res, t, ctx);
arf_clear(t);
return status;
diff --git a/src/nfloat/test/main.c b/src/nfloat/test/main.c
index 0a129221f1..8520994f89 100644
--- a/src/nfloat/test/main.c
+++ b/src/nfloat/test/main.c
@@ -13,6 +13,7 @@
#include "t-add_sub_n.c"
#include "t-addmul_submul.c"
+#include "t-mat_mul.c"
#include "t-nfloat.c"
#include "t-nfloat_complex.c"
@@ -22,6 +23,7 @@ test_struct tests[] =
{
TEST_FUNCTION(add_sub_n),
TEST_FUNCTION(addmul_submul),
+ TEST_FUNCTION(mat_mul),
TEST_FUNCTION(nfloat),
TEST_FUNCTION(nfloat_complex),
};
diff --git a/src/nfloat/test/t-mat_mul.c b/src/nfloat/test/t-mat_mul.c
new file mode 100644
index 0000000000..dad6ad625c
--- /dev/null
+++ b/src/nfloat/test/t-mat_mul.c
@@ -0,0 +1,103 @@
+/*
+ Copyright (C) 2024 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "test_helpers.h"
+#include "gr.h"
+#include "gr_mat.h"
+#include "nfloat.h"
+
+int
+nfloat_mat_mul_block1(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+{
+ return nfloat_mat_mul_block(C, A, B, 1, ctx);
+}
+
+TEST_FUNCTION_START(mat_mul, state)
+{
+ gr_ctx_t ctx;
+ slong prec;
+ slong iter;
+ gr_ptr tol;
+
+ for (iter = 0; iter < 10 * flint_test_multiplier(); iter++)
+ {
+ if (n_randint(state, 5))
+ prec = FLINT_BITS * (1 + n_randint(state, 4));
+ else
+ prec = FLINT_BITS * (1 + n_randint(state, NFLOAT_MAX_LIMBS));
+
+ nfloat_ctx_init(ctx, prec, 0);
+
+ tol = gr_heap_init(ctx);
+ GR_MUST_SUCCEED(gr_one(tol, ctx));
+ GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 2, ctx));
+
+ gr_mat_test_approx_mul_max_norm(
+ (gr_method_mat_binary_op) nfloat_mat_mul_waksman,
+ tol, state, (prec <= 256) ? 10 : 1, 10, ctx);
+
+ gr_mat_test_approx_mul_max_norm(
+ (gr_method_mat_binary_op) nfloat_mat_mul_block1,
+ tol, state, (prec <= 256) ? 10 : 1,
+ (prec <= 256) ? 40 : 20, ctx);
+
+ gr_mat_test_approx_mul_max_norm(
+ (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical,
+ tol, state, (prec <= 256) ? 10 : 1,
+ (prec <= 256) ? 40 : 20, ctx);
+
+ if (n_randint(state, 4) == 0)
+ gr_mat_test_approx_mul_max_norm(
+ (gr_method_mat_binary_op) nfloat_mat_mul,
+ tol, state, 1, 120, ctx);
+
+ gr_heap_clear(tol, ctx);
+ gr_ctx_clear(ctx);
+ }
+
+ for (iter = 0; iter < 100 * flint_test_multiplier(); iter++)
+ {
+ if (n_randint(state, 5))
+ prec = FLINT_BITS * (1 + n_randint(state, 4));
+ else
+ prec = FLINT_BITS * (1 + n_randint(state, NFLOAT_MAX_LIMBS));
+
+ nfloat_ctx_init(ctx, prec, 0);
+
+ tol = gr_heap_init(ctx);
+ GR_MUST_SUCCEED(gr_one(tol, ctx));
+ GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 6, ctx));
+
+ gr_mat_test_approx_mul_pos_entrywise_accurate(
+ (gr_method_mat_binary_op) nfloat_mat_mul_waksman,
+ tol, state, (prec <= 256) ? 10 : 1, 10, ctx);
+
+ gr_mat_test_approx_mul_pos_entrywise_accurate(
+ (gr_method_mat_binary_op) nfloat_mat_mul_block1,
+ tol, state, (prec <= 256) ? 10 : 1,
+ (prec <= 256) ? 40 : 20, ctx);
+
+ gr_mat_test_approx_mul_pos_entrywise_accurate(
+ (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical,
+ tol, state, (prec <= 256) ? 10 : 1,
+ (prec <= 256) ? 40 : 20, ctx);
+
+ if (n_randint(state, 4) == 0)
+ gr_mat_test_approx_mul_pos_entrywise_accurate(
+ (gr_method_mat_binary_op) nfloat_mat_mul,
+ tol, state, 1, 120, ctx);
+
+ gr_heap_clear(tol, ctx);
+ gr_ctx_clear(ctx);
+ }
+
+ TEST_FUNCTION_END(state);
+}
diff --git a/src/python/flint_ctypes.py b/src/python/flint_ctypes.py
index d2641b32e3..f12975d88c 100644
--- a/src/python/flint_ctypes.py
+++ b/src/python/flint_ctypes.py
@@ -1196,6 +1196,41 @@ def csgn(ctx, x):
def arg(ctx, x):
return ctx._unary_op(x, libgr.gr_arg, "arg($x)")
+ def min(ctx, x, y):
+ """
+ >>> QQ.min(QQ(1)/3, QQ(1)/4)
+ 1/4
+ >>> RR.min(3, RR.pi())
+ 3.000000000000000
+ >>> RR.min(RR("11 +/- 1"), RR("12 +/- 3"))
+ [1e+1 +/- 2.01]
+ >>> CC.min(2, 3)
+ 2.000000000000000
+ >>> CC.min(2, CC.i())
+ Traceback (most recent call last):
+ ...
+ FlintUnableError: failed to compute min(x, y) in {Complex numbers (acb, prec = 53)} for {x = 2.000000000000000}, {y = 1.000000000000000*I}
+
+ """
+ return ctx._binary_op(x, y, libgr.gr_min, "min($x, $y)")
+
+ def max(ctx, x, y):
+ """
+ >>> QQ.max(QQ(1)/3, QQ(1)/4)
+ 1/3
+ >>> RR.max(3, RR.pi())
+ [3.141592653589793 +/- 5.61e-16]
+ >>> RR.max(RR("10 +/- 1"), RR("9 +/- 3"))
+ [1e+1 +/- 2.01]
+ >>> CC.max(2, 3)
+ 3.000000000000000
+ >>> CC.max(2, CC.i())
+ Traceback (most recent call last):
+ ...
+ FlintUnableError: failed to compute max(x, y) in {Complex numbers (acb, prec = 53)} for {x = 2.000000000000000}, {y = 1.000000000000000*I}
+ """
+ return ctx._binary_op(x, y, libgr.gr_max, "max($x, $y)")
+
def inf(ctx):
"""
Positive infinity (for extended number sets which support it).
@@ -5474,6 +5509,60 @@ def __setitem__(self, ij, v):
if status & GR_DOMAIN: raise ValueError
return x
+ def norm_max(self):
+ """
+ >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_max()
+ 9.000000000000000
+ """
+ element_ring = self.parent()._element_ring
+ res = element_ring()
+ status = libgr.gr_mat_norm_max(res._ref, self._ref, element_ring._ref)
+ if status:
+ if status & GR_UNABLE: raise NotImplementedError
+ if status & GR_DOMAIN: raise ValueError
+ return res
+
+
+ def norm_1(self):
+ """
+ >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_1()
+ 18.00000000000000
+ """
+ element_ring = self.parent()._element_ring
+ res = element_ring()
+ status = libgr.gr_mat_norm_1(res._ref, self._ref, element_ring._ref)
+ if status:
+ if status & GR_UNABLE: raise NotImplementedError
+ if status & GR_DOMAIN: raise ValueError
+ return res
+
+ def norm_inf(self):
+ """
+ >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_inf()
+ 24.00000000000000
+ """
+ element_ring = self.parent()._element_ring
+ res = element_ring()
+ status = libgr.gr_mat_norm_inf(res._ref, self._ref, element_ring._ref)
+ if status:
+ if status & GR_UNABLE: raise NotImplementedError
+ if status & GR_DOMAIN: raise ValueError
+ return res
+
+ def norm_frobenius(self):
+ """
+ >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_frobenius()
+ [16.88194301613413 +/- 3.73e-15]
+ """
+ element_ring = self.parent()._element_ring
+ res = element_ring()
+ status = libgr.gr_mat_norm_frobenius(res._ref, self._ref, element_ring._ref)
+ if status:
+ if status & GR_UNABLE: raise NotImplementedError
+ if status & GR_DOMAIN: raise ValueError
+ return res
+
+
def nullspace(self):
"""
Right kernel (nullspace) of this matrix.