diff --git a/doc/source/gr.rst b/doc/source/gr.rst index 15183b3195..79992c8a4d 100644 --- a/doc/source/gr.rst +++ b/doc/source/gr.rst @@ -844,6 +844,23 @@ Ordering methods of *x* is less than, equal or greater than the absolute value of *y*. This may return ``GR_DOMAIN`` if the ring is not an ordered ring. +.. function:: truth_t gr_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_abs_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_abs_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_abs_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + truth_t gr_abs_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + + Wrappers of ``gr_cmp`` and ``gr_cmpabs`` returning truth values + for the comparison operations ``<=``, ``<``, ``>=``, ``>``. + +.. function:: int gr_min(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + int gr_max(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) + + Minimum and maximum value. + Enclosure and interval methods ........................................................................ diff --git a/doc/source/gr_mat.rst b/doc/source/gr_mat.rst index 9a2ac83670..93d8d2b9fa 100644 --- a/doc/source/gr_mat.rst +++ b/doc/source/gr_mat.rst @@ -154,6 +154,7 @@ Assignment and special values .. function:: int gr_mat_set(gr_mat_t res, const gr_mat_t mat, gr_ctx_t ctx) int gr_mat_set_fmpz_mat(gr_mat_t res, const fmpz_mat_t mat, gr_ctx_t ctx) int gr_mat_set_fmpq_mat(gr_mat_t res, const fmpq_mat_t mat, gr_ctx_t ctx) + int gr_mat_set_gr_mat_other(gr_mat_t res, const gr_mat_t mat, gr_ctx_t mat_ctx, gr_ctx_t ctx) Sets *res* to the value of *mat*. @@ -213,6 +214,58 @@ Basic row, column and entry operations This predicate is always decidable (even if the underlying ring is not computable), returning ``T_TRUE`` or ``T_FALSE``. +Entrywise operations +------------------------------------------------------------------------------- + +.. function:: int gr_mat_entrywise_unary_op(gr_mat_t res, gr_method_unary_op f, const gr_mat_t mat, gr_ctx_t ctx) + + Sets *res* to the application of the function *f* to the + entries of matrix *mat*. Returns ``GR_DOMAIN`` if the matrix dimensions do not match. + +.. function:: int gr_mat_entrywise_binary_op(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx) + + Sets *res* to the application of the function *f* + to the entries of *mat1* as first argument and the entries of *mat2* + as second argument. + Returns ``GR_DOMAIN`` if the matrix dimensions do not match. + +.. function:: int gr_mat_entrywise_binary_op_scalar(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx) + + Sets *res* to the application of the function *f* + to the entries of *mat* as first argument and the scalar *c* + as second argument. + Returns ``GR_DOMAIN`` if the matrix dimensions do not match. + +.. function:: truth_t gr_mat_entrywise_unary_predicate_all(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx) + truth_t gr_mat_entrywise_unary_predicate_any(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx) + + Returns whether the predicate *f* is true for all entries, + respectively for any entry, in the matrix *mat*. + +.. function:: truth_t gr_mat_entrywise_binary_predicate_all(gr_method_binary_predicate f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx) + + Returns whether the binary predicate *f* is true for all entries + in *mat1* paired with the corresponding entries in *mat2*. + Returns ``T_FALSE`` if the matrix dimensions are not compatible. + +Norms +------------------------------------------------------------------------------- + +.. function:: int gr_mat_norm_max(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) + + Max norm: `\max_{i,j} |a_{i,j}|`. + +.. function:: int gr_mat_norm_1(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) + + 1-norm (largest absolute column sum): `\max_{1\le j \le n} \sum_{i=1}^m |a_{i,j}|`. + +.. function:: int gr_mat_norm_inf(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) + + Infinity-norm (largest absolute row sum): `\max_{1\le i \le m} \sum_{j=1}^n |a_{i,j}|`. + +.. function:: int gr_mat_norm_frobenius(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) + + Frobenius norm: `\sqrt{\sum_{i,j} |a_{i,j}|^2}`. Arithmetic ------------------------------------------------------------------------------- @@ -802,6 +855,19 @@ on each test iteration, otherwise the given ring is tested. Tests the given function ``solve_impl`` for correctness as an implementation of :func:`gr_mat_nonsingular_solve_tril` / :func:`gr_mat_nonsingular_solve_triu`. +.. function:: void gr_mat_test_approx_mul_max_norm(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx) + + Tests the given implementation of matrix multiplication for accuracy + over an approximate numerical ring by checking that + `|C-AB| \le |A||B| rel\_tol` holds in the max norm, + using classical multiplication for reference. + +.. function:: void gr_mat_test_approx_mul_pos_entrywise_accurate(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx) + + Tests the given implementation of matrix multiplication for accuracy + over an approximate numerical ring by generating nonnegative matrices + and checking that the entrywise relative error compared to + classical multiplication does not exceed *rel_tol*. .. raw:: latex diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index 3f1d82e391..d708a16c6a 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -314,6 +314,16 @@ code for reduced overhead. .. function:: int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) +Matrix functions +------------------------------------------------------------------------------- + +.. function:: int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) + int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) + int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx) + int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) + + Different implementations of matrix multiplication. + Internal functions ------------------------------------------------------------------------------- diff --git a/src/gr.h b/src/gr.h index 728366c7ac..63cb0d7fcd 100644 --- a/src/gr.h +++ b/src/gr.h @@ -1109,6 +1109,25 @@ GR_INLINE WARN_UNUSED_RESULT int gr_cmpabs(int * res, gr_srcptr x, gr_srcptr y, GR_INLINE WARN_UNUSED_RESULT int gr_cmp_other(int * res, gr_srcptr x, gr_srcptr y, gr_ctx_t y_ctx, gr_ctx_t ctx) { return GR_BINARY_OP_OTHER_GET_INT(ctx, CMP_OTHER)(res, x, y, y_ctx, ctx); } GR_INLINE WARN_UNUSED_RESULT int gr_cmpabs_other(int * res, gr_srcptr x, gr_srcptr y, gr_ctx_t y_ctx, gr_ctx_t ctx) { return GR_BINARY_OP_OTHER_GET_INT(ctx, CMPABS_OTHER)(res, x, y, y_ctx, ctx); } +#define __GR_CMP(cfun, expr) \ + int cmp; \ + if ((cfun)(&cmp, x, y, ctx) != GR_SUCCESS) \ + return T_UNKNOWN; \ + return (expr) ? T_TRUE : T_FALSE; \ + +GR_INLINE WARN_UNUSED_RESULT truth_t gr_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp <= 0) } +GR_INLINE WARN_UNUSED_RESULT truth_t gr_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp < 0) } +GR_INLINE WARN_UNUSED_RESULT truth_t gr_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp >= 0) } +GR_INLINE WARN_UNUSED_RESULT truth_t gr_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmp, cmp > 0) } + +GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_le(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp <= 0) } +GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_lt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp < 0) } +GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_ge(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp >= 0) } +GR_INLINE WARN_UNUSED_RESULT truth_t gr_abs_gt(gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { __GR_CMP(gr_cmpabs, cmp > 0) } + +GR_INLINE WARN_UNUSED_RESULT int gr_min(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { return GR_BINARY_OP(ctx, MIN)(res, x, y, ctx); } +GR_INLINE WARN_UNUSED_RESULT int gr_max(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) { return GR_BINARY_OP(ctx, MAX)(res, x, y, ctx); } + GR_INLINE WARN_UNUSED_RESULT int gr_gen(gr_ptr res, gr_ctx_t ctx) { return GR_CONSTANT_OP(ctx, GEN)(res, ctx); } GR_INLINE WARN_UNUSED_RESULT int gr_gens(gr_vec_t res, gr_ctx_t ctx) { return GR_VEC_CTX_OP(ctx, GENS)(res, ctx); } GR_INLINE WARN_UNUSED_RESULT int gr_gens_recursive(gr_vec_t res, gr_ctx_t ctx) { return GR_VEC_CTX_OP(ctx, GENS_RECURSIVE)(res, ctx); } diff --git a/src/gr/acb.c b/src/gr/acb.c index 3f7cb4e08a..0a14fbd78e 100644 --- a/src/gr/acb.c +++ b/src/gr/acb.c @@ -1004,6 +1004,32 @@ _gr_acb_cmpabs(int * res, const acb_t x, const acb_t y, const gr_ctx_t ctx) return _gr_acb_cmp(res, t, u, ctx); } +int +_gr_acb_min(acb_t res, const acb_t x, const acb_t y, const gr_ctx_t ctx) +{ + if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y))) + { + arb_min(acb_realref(res), acb_realref(x), acb_realref(y), ACB_CTX_PREC(ctx)); + arb_zero(acb_imagref(res)); + return GR_SUCCESS; + } + else + return GR_UNABLE; +} + +int +_gr_acb_max(acb_t res, const acb_t x, const acb_t y, const gr_ctx_t ctx) +{ + if (arb_is_zero(acb_imagref(x)) && arb_is_zero(acb_imagref(y))) + { + arb_max(acb_realref(res), acb_realref(x), acb_realref(y), ACB_CTX_PREC(ctx)); + arb_zero(acb_imagref(res)); + return GR_SUCCESS; + } + else + return GR_UNABLE; +} + int _gr_acb_pi(acb_t res, const gr_ctx_t ctx) { @@ -1330,13 +1356,7 @@ _gr_acb_gamma_fmpq(acb_t res, const fmpq_t x, const gr_ctx_t ctx) } } - -int -_gr_acb_rgamma(acb_t res, const acb_t x, const gr_ctx_t ctx) -{ - acb_rgamma(res, x, ACB_CTX_PREC(ctx)); - return GR_SUCCESS; -} +DEF_FUNC(rgamma) int _gr_acb_lgamma(acb_t res, const acb_t x, const gr_ctx_t ctx) @@ -1642,12 +1662,7 @@ int _gr_acb_stieltjes(acb_t res, const fmpz_t n, const acb_t a, const gr_ctx_t c return acb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; } -int -_gr_acb_dirichlet_eta(acb_t res, const acb_t x, const gr_ctx_t ctx) -{ - acb_dirichlet_eta(res, x, ACB_CTX_PREC(ctx)); - return GR_SUCCESS; -} +DEF_FUNC(dirichlet_eta) /* todo int @@ -2220,6 +2235,8 @@ gr_method_tab_input _acb_methods_input[] = {GR_METHOD_ARG, (gr_funcptr) _gr_acb_arg}, {GR_METHOD_CMP, (gr_funcptr) _gr_acb_cmp}, {GR_METHOD_CMPABS, (gr_funcptr) _gr_acb_cmpabs}, + {GR_METHOD_MIN, (gr_funcptr) _gr_acb_min}, + {GR_METHOD_MAX, (gr_funcptr) _gr_acb_max}, {GR_METHOD_PI, (gr_funcptr) _gr_acb_pi}, {GR_METHOD_EXP, (gr_funcptr) _gr_acb_exp}, {GR_METHOD_EXPM1, (gr_funcptr) _gr_acb_expm1}, diff --git a/src/gr/arb.c b/src/gr/arb.c index 6f315dd685..cb9fb88f58 100644 --- a/src/gr/arb.c +++ b/src/gr/arb.c @@ -33,6 +33,55 @@ gr_arb_ctx; #define ARB_CTX_PREC(ring_ctx) (((gr_arb_ctx *)((ring_ctx)))->prec) +#define DEF_FUNC(fname) \ +int \ +_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \ +{ \ + arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \ + return GR_SUCCESS; \ +} \ + +#define DEF_FUNC_NOPREC(fname) \ +int \ +_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \ +{ \ + arb_ ## fname(res, x); \ + return GR_SUCCESS; \ +} \ + + +#define DEF_2FUNC(fname) \ +int \ +_gr_arb_ ## fname(arb_t res1, arb_t res2, const arb_t x, const gr_ctx_t ctx) \ +{ \ + arb_ ## fname(res1, res2, x, ARB_CTX_PREC(ctx)); \ + return GR_SUCCESS; \ +} \ + +#define DEF_FUNC2(fname) \ +int \ +_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \ +{ \ + arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \ + return GR_SUCCESS; \ +} \ + +#define DEF_FUNC_SING(fname) \ +int \ +_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \ +{ \ + arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \ + return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \ +} \ + +#define DEF_FUNC2_SING(fname) \ +int \ +_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \ +{ \ + arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \ + return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \ +} \ + int _gr_arb_ctx_set_real_prec(gr_ctx_t ctx, slong prec) { prec = FLINT_MAX(prec, 2); @@ -407,26 +456,13 @@ _gr_arb_equal(const arb_t x, const arb_t y, const gr_ctx_t ctx) return T_FALSE; } -int -_gr_arb_set(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_set(res, x); - return GR_SUCCESS; -} - -int -_gr_arb_neg(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_neg(res, x); - return GR_SUCCESS; -} - -int -_gr_arb_add(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) -{ - arb_add(res, x, y, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} +DEF_FUNC_NOPREC(set) +DEF_FUNC_NOPREC(neg) +DEF_FUNC2(add) +DEF_FUNC2(sub) +DEF_FUNC2(addmul) +DEF_FUNC2(submul) +DEF_FUNC(sqr) int _gr_arb_add_si(arb_t res, const arb_t x, slong y, const gr_ctx_t ctx) @@ -449,13 +485,6 @@ _gr_arb_add_fmpz(arb_t res, const arb_t x, const fmpz_t y, const gr_ctx_t ctx) return GR_SUCCESS; } -int -_gr_arb_sub(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) -{ - arb_sub(res, x, y, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - int _gr_arb_sub_si(arb_t res, const arb_t x, slong y, const gr_ctx_t ctx) { @@ -505,20 +534,6 @@ _gr_arb_mul_fmpz(arb_t res, const arb_t x, const fmpz_t y, const gr_ctx_t ctx) return GR_SUCCESS; } -int -_gr_arb_addmul(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) -{ - arb_addmul(res, x, y, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - -int -_gr_arb_submul(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) -{ - arb_submul(res, x, y, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - int _gr_arb_mul_two(arb_t res, const arb_t x, const gr_ctx_t ctx) { @@ -526,13 +541,6 @@ _gr_arb_mul_two(arb_t res, const arb_t x, const gr_ctx_t ctx) return GR_SUCCESS; } -int -_gr_arb_sqr(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_sqr(res, x, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - int _gr_arb_mul_2exp_si(arb_t res, const arb_t x, slong y, const gr_ctx_t ctx) { @@ -810,40 +818,12 @@ _gr_arb_rsqrt(arb_t res, const arb_t x, const gr_ctx_t ctx) } } -int -_gr_arb_floor(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_floor(res, x, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - -int -_gr_arb_ceil(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_ceil(res, x, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - -int -_gr_arb_trunc(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_trunc(res, x, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - -int -_gr_arb_nint(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_nint(res, x, ARB_CTX_PREC(ctx)); - return GR_SUCCESS; -} - -int -_gr_arb_abs(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_abs(res, x); - return GR_SUCCESS; -} +DEF_FUNC(floor) +DEF_FUNC(ceil) +DEF_FUNC(trunc) +DEF_FUNC(nint) +DEF_FUNC_NOPREC(abs) +DEF_FUNC_NOPREC(sgn) int _gr_arb_conj(arb_t res, const arb_t x, const gr_ctx_t ctx) @@ -859,13 +839,6 @@ _gr_arb_im(arb_t res, const arb_t x, const gr_ctx_t ctx) return GR_SUCCESS; } -int -_gr_arb_sgn(arb_t res, const arb_t x, const gr_ctx_t ctx) -{ - arb_sgn(res, x); - return GR_SUCCESS; -} - int _gr_arb_arg(arb_t res, const arb_t x, const gr_ctx_t ctx) { @@ -956,47 +929,6 @@ _gr_arb_glaisher(arb_t res, const gr_ctx_t ctx) return GR_SUCCESS; } -#define DEF_FUNC(fname) \ -int \ -_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \ -{ \ - arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \ - return GR_SUCCESS; \ -} \ - -#define DEF_2FUNC(fname) \ -int \ -_gr_arb_ ## fname(arb_t res1, arb_t res2, const arb_t x, const gr_ctx_t ctx) \ -{ \ - arb_ ## fname(res1, res2, x, ARB_CTX_PREC(ctx)); \ - return GR_SUCCESS; \ -} \ - -#define DEF_FUNC2(fname) \ -int \ -_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \ -{ \ - arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \ - return GR_SUCCESS; \ -} \ - -#define DEF_FUNC_SING(fname) \ -int \ -_gr_arb_ ## fname(arb_t res, const arb_t x, const gr_ctx_t ctx) \ -{ \ - arb_ ## fname(res, x, ARB_CTX_PREC(ctx)); \ - return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \ -} \ - -#define DEF_FUNC2_SING(fname) \ -int \ -_gr_arb_ ## fname(arb_t res, const arb_t x, const arb_t y, const gr_ctx_t ctx) \ -{ \ - arb_ ## fname(res, x, y, ARB_CTX_PREC(ctx)); \ - return arb_is_finite(res) ? GR_SUCCESS : GR_UNABLE; \ -} \ - - DEF_FUNC(exp) DEF_FUNC(expm1) DEF_FUNC_SING(log1p) @@ -1016,6 +948,9 @@ _gr_arb_log(arb_t res, const arb_t x, const gr_ctx_t ctx) return GR_UNABLE; } +DEF_FUNC2(min) +DEF_FUNC2(max) + DEF_FUNC(sin) DEF_FUNC(cos) DEF_2FUNC(sin_cos) @@ -1863,6 +1798,8 @@ gr_method_tab_input _arb_methods_input[] = {GR_METHOD_ARG, (gr_funcptr) _gr_arb_arg}, {GR_METHOD_CMP, (gr_funcptr) _gr_arb_cmp}, {GR_METHOD_CMPABS, (gr_funcptr) _gr_arb_cmpabs}, + {GR_METHOD_MIN, (gr_funcptr) _gr_arb_min}, + {GR_METHOD_MAX, (gr_funcptr) _gr_arb_max}, {GR_METHOD_I, (gr_funcptr) gr_not_in_domain}, {GR_METHOD_PI, (gr_funcptr) _gr_arb_pi}, {GR_METHOD_EULER, (gr_funcptr) _gr_arb_euler}, diff --git a/src/gr/matrix.c b/src/gr/matrix.c index 9a0e6aab4c..ba1af2ad0a 100644 --- a/src/gr/matrix.c +++ b/src/gr/matrix.c @@ -221,9 +221,6 @@ matrix_set_other(gr_mat_t res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx) else if (x_ctx->which_ring == GR_CTX_GR_MAT) { const gr_mat_struct * xmat = x; - slong i, j; - int status; - slong sz, xsz; if (res->r != xmat->r || res->c != xmat->c) { @@ -233,24 +230,8 @@ matrix_set_other(gr_mat_t res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx) return GR_DOMAIN; } - sz = MATRIX_CTX(ctx)->base_ring->sizeof_elem; - xsz = MATRIX_CTX(x_ctx)->base_ring->sizeof_elem; - - for (i = 0; i < xmat->r; i++) - { - for (j = 0; j < xmat->c; j++) - { - status = gr_set_other(GR_MAT_ENTRY(res, i, j, sz), - GR_MAT_ENTRY(xmat, i, j, xsz), - MATRIX_CTX(x_ctx)->base_ring, + return gr_mat_set_gr_mat_other(res, xmat, MATRIX_CTX(x_ctx)->base_ring, MATRIX_CTX(ctx)->base_ring); - - if (status != GR_SUCCESS) - return status; - } - } - - return GR_SUCCESS; } else { diff --git a/src/gr_generic/generic.c b/src/gr_generic/generic.c index 7b343c7828..0908526a1f 100644 --- a/src/gr_generic/generic.c +++ b/src/gr_generic/generic.c @@ -1317,6 +1317,34 @@ gr_generic_cmpabs_other(int * res, gr_srcptr x, gr_srcptr y, gr_ctx_t y_ctx, gr_ return status; } +int +gr_generic_min(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) +{ + int cmp; + + if (gr_cmp(&cmp, x, y, ctx) != GR_SUCCESS) + return GR_UNABLE; + + if (cmp <= 0) + return gr_set(res, x, ctx); + else + return gr_set(res, y, ctx); +} + +int +gr_generic_max(gr_ptr res, gr_srcptr x, gr_srcptr y, gr_ctx_t ctx) +{ + int cmp; + + if (gr_cmp(&cmp, x, y, ctx) != GR_SUCCESS) + return GR_UNABLE; + + if (cmp >= 0) + return gr_set(res, x, ctx); + else + return gr_set(res, y, ctx); +} + int gr_generic_bernoulli_ui(gr_ptr res, ulong n, gr_ctx_t ctx) { @@ -2664,6 +2692,8 @@ const gr_method_tab_input _gr_generic_methods[] = {GR_METHOD_CMPABS, (gr_funcptr) gr_generic_cmpabs}, {GR_METHOD_CMP_OTHER, (gr_funcptr) gr_generic_cmp_other}, {GR_METHOD_CMPABS_OTHER, (gr_funcptr) gr_generic_cmpabs_other}, + {GR_METHOD_MIN, (gr_funcptr) gr_generic_min}, + {GR_METHOD_MAX, (gr_funcptr) gr_generic_max}, {GR_METHOD_EXP, (gr_funcptr) gr_generic_exp}, {GR_METHOD_EXPM1, (gr_funcptr) gr_generic_expm1}, diff --git a/src/gr_mat.h b/src/gr_mat.h index c8c722a4cc..68b36147f1 100644 --- a/src/gr_mat.h +++ b/src/gr_mat.h @@ -118,6 +118,17 @@ WARN_UNUSED_RESULT int gr_mat_set_fmpq(gr_mat_t res, const fmpq_t v, gr_ctx_t ct WARN_UNUSED_RESULT int gr_mat_set_fmpz_mat(gr_mat_t res, const fmpz_mat_t mat, gr_ctx_t ctx); WARN_UNUSED_RESULT int gr_mat_set_fmpq_mat(gr_mat_t res, const fmpq_mat_t mat, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_set_gr_mat_other(gr_mat_t res, const gr_mat_t mat, gr_ctx_t mat_ctx, gr_ctx_t ctx); + +/* fixme: needed for method typedefs */ +#ifdef GR_H +WARN_UNUSED_RESULT int gr_mat_entrywise_unary_op(gr_mat_t res, gr_method_unary_op f, const gr_mat_t mat, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_entrywise_binary_op(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_entrywise_binary_op_scalar(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx); +truth_t gr_mat_entrywise_unary_predicate_all(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx); +truth_t gr_mat_entrywise_unary_predicate_any(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx); +truth_t gr_mat_entrywise_binary_predicate_all(gr_method_binary_predicate f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx); +#endif WARN_UNUSED_RESULT int gr_mat_neg(gr_mat_t res, const gr_mat_t mat, gr_ctx_t ctx); WARN_UNUSED_RESULT int gr_mat_swap_entrywise(gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx); @@ -279,6 +290,11 @@ WARN_UNUSED_RESULT int gr_mat_exp(gr_mat_t res, const gr_mat_t A, gr_ctx_t ctx); WARN_UNUSED_RESULT int gr_mat_log_jordan(gr_mat_t res, const gr_mat_t A, gr_ctx_t ctx); WARN_UNUSED_RESULT int gr_mat_log(gr_mat_t res, const gr_mat_t A, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_norm_max(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_norm_1(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_norm_inf(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx); +WARN_UNUSED_RESULT int gr_mat_norm_frobenius(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx); + /* Test functions */ void gr_mat_test_mul(gr_method_mat_binary_op mul_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx); @@ -286,6 +302,8 @@ void gr_mat_test_lu(gr_method_mat_lu_op lu_impl, flint_rand_t state, slong iters void gr_mat_test_det(gr_method_mat_unary_op_get_scalar det_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx); void gr_mat_test_nonsingular_solve_tril(gr_method_mat_binary_op_with_flag solve_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx); void gr_mat_test_nonsingular_solve_triu(gr_method_mat_binary_op_with_flag solve_impl, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx); +void gr_mat_test_approx_mul_max_norm(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx); +void gr_mat_test_approx_mul_pos_entrywise_accurate(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx); #ifdef __cplusplus } diff --git a/src/gr_mat/entrywise.c b/src/gr_mat/entrywise.c new file mode 100644 index 0000000000..d5d66be260 --- /dev/null +++ b/src/gr_mat/entrywise.c @@ -0,0 +1,148 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "gr.h" +#include "gr_mat.h" + +int +gr_mat_entrywise_unary_op(gr_mat_t res, gr_method_unary_op f, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx)) + return GR_DOMAIN; + + for (i = 0; i < R; i++) + for (j = 0; j < C; j++) + status |= f(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat, i, j, sz), ctx); + + return status; +} + +int +gr_mat_entrywise_binary_op(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + + R = gr_mat_nrows(mat1, ctx); + C = gr_mat_ncols(mat1, ctx); + + if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx) || R != gr_mat_nrows(mat2, ctx) || C != gr_mat_ncols(mat2, ctx)) + return GR_DOMAIN; + + for (i = 0; i < R; i++) + for (j = 0; j < C; j++) + status |= f(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat1, i, j, sz), GR_MAT_ENTRY(mat2, i, j, sz), ctx); + + return status; +} + +int +gr_mat_entrywise_binary_op_scalar(gr_mat_t res, gr_method_binary_op f, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx)) + return GR_DOMAIN; + + for (i = 0; i < R; i++) + for (j = 0; j < C; j++) + status |= f(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat, i, j, sz), c, ctx); + + return status; +} + +truth_t +gr_mat_entrywise_unary_predicate_all(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + truth_t val, ans = T_TRUE; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + for (i = 0; i < R; i++) + { + for (j = 0; j < C; j++) + { + val = f(GR_MAT_ENTRY(mat, i, j, sz), ctx); + if (val == T_FALSE) + return T_FALSE; + ans = truth_and(ans, val); + } + } + + return ans; +} + +truth_t +gr_mat_entrywise_unary_predicate_any(gr_method_unary_predicate f, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + truth_t val, ans = T_FALSE; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + for (i = 0; i < R; i++) + { + for (j = 0; j < C; j++) + { + val = f(GR_MAT_ENTRY(mat, i, j, sz), ctx); + if (val == T_TRUE) + return T_TRUE; + ans = truth_or(ans, val); + } + } + + return ans; +} + +truth_t +gr_mat_entrywise_binary_predicate_all(gr_method_binary_predicate f, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + truth_t val, ans = T_TRUE; + + R = gr_mat_nrows(mat1, ctx); + C = gr_mat_ncols(mat1, ctx); + + if (R != gr_mat_nrows(mat2, ctx) || C != gr_mat_ncols(mat2, ctx)) + return T_FALSE; + + for (i = 0; i < R; i++) + { + for (j = 0; j < C; j++) + { + val = f(GR_MAT_ENTRY(mat1, i, j, sz), GR_MAT_ENTRY(mat2, i, j, sz), ctx); + if (val == T_FALSE) + return T_FALSE; + ans = truth_and(ans, val); + } + } + + return ans; +} diff --git a/src/gr_mat/norm.c b/src/gr_mat/norm.c new file mode 100644 index 0000000000..c5cc7abdad --- /dev/null +++ b/src/gr_mat/norm.c @@ -0,0 +1,167 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "gr.h" +#include "gr_mat.h" + +/* todo: allow overloading the following methods (or at least use + vector functions) + todo: quick bound versions */ + +int +gr_mat_norm_max(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + gr_ptr t; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + if (R == 0 || C == 0) + return gr_zero(res, ctx); + + GR_TMP_INIT(t, ctx); + + for (i = 0; i < R; i++) + { + for (j = 0; j < C; j++) + { + if (i == 0 && j == 0) + status |= gr_abs(res, GR_MAT_ENTRY(mat, i, j, sz), ctx); + else + { + status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx); + status |= gr_max(res, res, t, ctx); + } + } + } + + GR_TMP_CLEAR(t, ctx); + + return status; +} + +int +gr_mat_norm_1(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + gr_ptr s, t; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + if (R == 0 || C == 0) + return gr_zero(res, ctx); + + GR_TMP_INIT2(s, t, ctx); + + for (j = 0; j < C; j++) + { + for (i = 0; i < R; i++) + { + if (i == 0) + status |= gr_abs(s, GR_MAT_ENTRY(mat, i, j, sz), ctx); + else + { + status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx); + status |= gr_add(s, s, t, ctx); + } + } + + if (j == 0) + gr_swap(res, s, ctx); + else + status |= gr_max(res, res, s, ctx); + } + + GR_TMP_CLEAR2(s, t, ctx); + + return status; +} + +int +gr_mat_norm_inf(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + gr_ptr s, t; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + if (R == 0 || C == 0) + return gr_zero(res, ctx); + + GR_TMP_INIT2(s, t, ctx); + + for (i = 0; i < R; i++) + { + for (j = 0; j < C; j++) + { + if (j == 0) + status |= gr_abs(s, GR_MAT_ENTRY(mat, i, j, sz), ctx); + else + { + status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx); + status |= gr_add(s, s, t, ctx); + } + } + + if (i == 0) + gr_swap(res, s, ctx); + else + status |= gr_max(res, res, s, ctx); + } + + GR_TMP_CLEAR2(s, t, ctx); + + return status; +} + +int +gr_mat_norm_frobenius(gr_ptr res, const gr_mat_t mat, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + gr_ptr t; + + R = gr_mat_nrows(mat, ctx); + C = gr_mat_ncols(mat, ctx); + + if (R == 0 || C == 0) + return gr_zero(res, ctx); + + GR_TMP_INIT(t, ctx); + + status |= gr_zero(res, ctx); + + for (i = 0; i < R; i++) + { + for (j = 0; j < C; j++) + { + status |= gr_abs(t, GR_MAT_ENTRY(mat, i, j, sz), ctx); + status |= gr_sqr(t, t, ctx); + status |= gr_add(res, res, t, ctx); + } + } + + status |= gr_sqrt(res, res, ctx); + + GR_TMP_CLEAR(t, ctx); + + return status; +} diff --git a/src/gr_mat/randtest.c b/src/gr_mat/randtest.c index ce30c13ad9..e4a820de4e 100644 --- a/src/gr_mat/randtest.c +++ b/src/gr_mat/randtest.c @@ -16,16 +16,23 @@ int gr_mat_randtest(gr_mat_t mat, flint_rand_t state, gr_ctx_t ctx) { - int status; - slong i, r, c; + int status = GR_SUCCESS; + slong i, j, r, c; + slong sz = ctx->sizeof_elem; r = gr_mat_nrows(mat, ctx); c = gr_mat_ncols(mat, ctx); - status = GR_SUCCESS; - for (i = 0; i < r; i++) + if (n_randint(state, 10) == 0) { - status |= _gr_vec_randtest(mat->rows[i], state, c, ctx); + for (i = 0; i < r; i++) + for (j = 0; j < c; j++) + status |= gr_randtest(GR_MAT_ENTRY(mat, i, j, sz), state, ctx); + } + else + { + for (i = 0; i < r; i++) + status |= _gr_vec_randtest(mat->rows[i], state, c, ctx); } return status; diff --git a/src/gr_mat/set_gr_mat_other.c b/src/gr_mat/set_gr_mat_other.c new file mode 100644 index 0000000000..a410ae3d8a --- /dev/null +++ b/src/gr_mat/set_gr_mat_other.c @@ -0,0 +1,34 @@ +/* + Copyright (C) 2022 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "gr_vec.h" +#include "gr_mat.h" + +int +gr_mat_set_gr_mat_other(gr_mat_t res, const gr_mat_t mat, gr_ctx_t mat_ctx, gr_ctx_t ctx) +{ + slong R, C, i, j; + slong sz = ctx->sizeof_elem; + slong mat_sz = mat_ctx->sizeof_elem; + int status = GR_SUCCESS; + + R = gr_mat_nrows(mat, mat_ctx); + C = gr_mat_ncols(mat, mat_ctx); + + if (R != gr_mat_nrows(res, ctx) || C != gr_mat_ncols(res, ctx)) + return GR_DOMAIN; + + for (i = 0; i < R; i++) + for (j = 0; j < C && status == GR_SUCCESS; j++) + status |= gr_set_other(GR_MAT_ENTRY(res, i, j, sz), GR_MAT_ENTRY(mat, i, j, mat_sz), mat_ctx, ctx); + + return status; +} \ No newline at end of file diff --git a/src/gr_mat/test_approx_mul.c b/src/gr_mat/test_approx_mul.c new file mode 100644 index 0000000000..e3c3c69424 --- /dev/null +++ b/src/gr_mat/test_approx_mul.c @@ -0,0 +1,234 @@ +/* + Copyright (C) 2022, 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you grn redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "gr.h" +#include "gr_mat.h" + +void gr_mat_test_approx_mul_max_norm(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx) +{ + slong iter; + gr_ctx_ptr given_ctx = ctx; + + for (iter = 0; iter < iters; iter++) + { + gr_mat_t A, B, C, D, ERR; + gr_ptr err, amag, bmag, tol; + slong a, b, c; + int status = GR_SUCCESS; + gr_ctx_t my_ctx; + gr_ctx_ptr ctx; + + if (given_ctx == NULL) + { + gr_ctx_init_random(my_ctx, state); + ctx = my_ctx; + } + else + ctx = given_ctx; + + if (n_randint(state, 4) == 0) + { + a = b = c = n_randint(state, maxn); + } + else + { + a = n_randint(state, maxn); + b = n_randint(state, maxn); + c = n_randint(state, maxn); + } + + gr_mat_init(A, a, b, ctx); + gr_mat_init(B, b, c, ctx); + gr_mat_init(C, a, c, ctx); + gr_mat_init(D, a, c, ctx); + gr_mat_init(ERR, a, c, ctx); + err = gr_heap_init(ctx); + amag = gr_heap_init(ctx); + bmag = gr_heap_init(ctx); + tol = gr_heap_init(ctx); + + status |= gr_mat_randtest(A, state, ctx); + status |= gr_mat_randtest(B, state, ctx); + status |= gr_mat_entrywise_unary_op(A, (gr_method_unary_op) gr_abs, A, ctx); + status |= gr_mat_entrywise_unary_op(B, (gr_method_unary_op) gr_abs, B, ctx); + + status |= gr_mat_randtest(C, state, ctx); + status |= gr_mat_randtest(D, state, ctx); + + if (b == c && n_randint(state, 2)) + { + status |= gr_mat_set(C, A, ctx); + status |= mul_impl(C, C, B, ctx); + } + else if (a == b && n_randint(state, 2)) + { + status |= gr_mat_set(C, B, ctx); + status |= mul_impl(C, A, C, ctx); + } + else if (a == b && b == c && n_randint(state, 2)) + { + status |= gr_mat_set(B, A, ctx); + status |= mul_impl(C, A, A, ctx); + } + else if (a == b && b == c && n_randint(state, 2)) + { + status |= gr_mat_set(B, A, ctx); + status |= gr_mat_set(C, A, ctx); + status |= mul_impl(C, C, C, ctx); + } + else + { + status |= mul_impl(C, A, B, ctx); + } + + status |= gr_mat_mul_classical(D, A, B, ctx); + + status |= gr_mat_sub(ERR, C, D, ctx); + + status |= gr_mat_norm_max(err, ERR, ctx); + status |= gr_mat_norm_max(amag, A, ctx); + status |= gr_mat_norm_max(bmag, B, ctx); + status |= gr_mul(tol, amag, bmag, ctx); + status |= gr_mul(tol, tol, rel_tol, ctx); + + if (status == GR_SUCCESS && gr_le(err, tol, ctx) == T_FALSE) + { + flint_printf("FAIL:\n"); + gr_ctx_println(ctx); + flint_printf("A:\n"); gr_mat_print(A, ctx); flint_printf("\n\n"); + flint_printf("B:\n"); gr_mat_print(B, ctx); flint_printf("\n\n"); + flint_printf("C:\n"); gr_mat_print(C, ctx); flint_printf("\n\n"); + flint_printf("D:\n"); gr_mat_print(D, ctx); flint_printf("\n\n"); + flint_printf("ERR:\n"); gr_mat_print(ERR, ctx); flint_printf("\n\n"); + flint_printf("err:\n"); gr_println(err, ctx); flint_printf("\n\n"); + flint_printf("tol:\n"); gr_println(tol, ctx); flint_printf("\n\n"); + flint_abort(); + } + + gr_mat_clear(A, ctx); + gr_mat_clear(B, ctx); + gr_mat_clear(C, ctx); + gr_mat_clear(D, ctx); + gr_heap_clear(err, ctx); + gr_heap_clear(amag, ctx); + gr_heap_clear(bmag, ctx); + gr_heap_clear(tol, ctx); + + if (given_ctx == NULL) + gr_ctx_clear(ctx); + } +} + +void gr_mat_test_approx_mul_pos_entrywise_accurate(gr_method_mat_binary_op mul_impl, gr_srcptr rel_tol, flint_rand_t state, slong iters, slong maxn, gr_ctx_t ctx) +{ + slong iter; + gr_ctx_ptr given_ctx = ctx; + + for (iter = 0; iter < iters; iter++) + { + gr_mat_t A, B, C, D, ERR, TOL; + slong a, b, c; + int status = GR_SUCCESS; + gr_ctx_t my_ctx; + gr_ctx_ptr ctx; + + if (given_ctx == NULL) + { + gr_ctx_init_random(my_ctx, state); + ctx = my_ctx; + } + else + ctx = given_ctx; + + if (n_randint(state, 4) == 0) + { + a = b = c = n_randint(state, maxn); + } + else + { + a = n_randint(state, maxn); + b = n_randint(state, maxn); + c = n_randint(state, maxn); + } + + gr_mat_init(A, a, b, ctx); + gr_mat_init(B, b, c, ctx); + gr_mat_init(C, a, c, ctx); + gr_mat_init(D, a, c, ctx); + gr_mat_init(ERR, a, c, ctx); + gr_mat_init(TOL, a, c, ctx); + + status |= gr_mat_randtest(A, state, ctx); + status |= gr_mat_randtest(B, state, ctx); + status |= gr_mat_entrywise_unary_op(A, (gr_method_unary_op) gr_abs, A, ctx); + status |= gr_mat_entrywise_unary_op(B, (gr_method_unary_op) gr_abs, B, ctx); + + status |= gr_mat_randtest(C, state, ctx); + status |= gr_mat_randtest(D, state, ctx); + + if (b == c && n_randint(state, 2)) + { + status |= gr_mat_set(C, A, ctx); + status |= mul_impl(C, C, B, ctx); + } + else if (a == b && n_randint(state, 2)) + { + status |= gr_mat_set(C, B, ctx); + status |= mul_impl(C, A, C, ctx); + } + else if (a == b && b == c && n_randint(state, 2)) + { + status |= gr_mat_set(B, A, ctx); + status |= mul_impl(C, A, A, ctx); + } + else if (a == b && b == c && n_randint(state, 2)) + { + status |= gr_mat_set(B, A, ctx); + status |= gr_mat_set(C, A, ctx); + status |= mul_impl(C, C, C, ctx); + } + else + { + status |= mul_impl(C, A, B, ctx); + } + + status |= gr_mat_mul_classical(D, A, B, ctx); + + /* |C-D| <= |D| tol */ + status |= gr_mat_sub(ERR, C, D, ctx); + status |= gr_mat_entrywise_unary_op(ERR, (gr_method_unary_op) gr_abs, ERR, ctx); + status |= gr_mat_entrywise_unary_op(TOL, (gr_method_unary_op) gr_abs, D, ctx); + status |= gr_mat_mul_scalar(TOL, TOL, rel_tol, ctx); + + if (status == GR_SUCCESS && gr_mat_entrywise_binary_predicate_all((gr_method_binary_predicate) gr_le, ERR, TOL, ctx) == T_FALSE) + { + flint_printf("FAIL:\n"); + gr_ctx_println(ctx); + flint_printf("A:\n"); gr_mat_print(A, ctx); flint_printf("\n\n"); + flint_printf("B:\n"); gr_mat_print(B, ctx); flint_printf("\n\n"); + flint_printf("C:\n"); gr_mat_print(C, ctx); flint_printf("\n\n"); + flint_printf("D:\n"); gr_mat_print(D, ctx); flint_printf("\n\n"); + flint_printf("ERR:\n"); gr_mat_print(ERR, ctx); flint_printf("\n\n"); + flint_printf("TOL:\n"); gr_mat_print(TOL, ctx); flint_printf("\n\n"); + flint_abort(); + } + + gr_mat_clear(A, ctx); + gr_mat_clear(B, ctx); + gr_mat_clear(C, ctx); + gr_mat_clear(D, ctx); + gr_mat_clear(ERR, ctx); + gr_mat_clear(TOL, ctx); + + if (given_ctx == NULL) + gr_ctx_clear(ctx); + } +} diff --git a/src/nfloat.h b/src/nfloat.h index 45a16d3eee..78b764a213 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -453,6 +453,11 @@ int _nfloat_vec_submul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); +int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); +int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); +int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx); +int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); + /* Complex numbers */ /* Note: we use the same context data for real and complex rings (only which_ring and sizeof_elem differ). This allows us to call diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index 250f15bf8d..6178b877cc 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -173,8 +173,8 @@ gr_method_tab_input _nfloat_methods_input[] = /* {GR_METHOD_POLY_MULLOW, (gr_funcptr) nfloat_poly_mullow}, {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other}, - {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul}, */ + {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul}, {GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field}, {GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs}, diff --git a/src/nfloat/mat_mul.c b/src/nfloat/mat_mul.c new file mode 100644 index 0000000000..169722a3ec --- /dev/null +++ b/src/nfloat/mat_mul.c @@ -0,0 +1,931 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "mpn_extras.h" +#include "gr.h" +#include "gr_mat.h" +#include "gr_generic.h" +#include "acf.h" +#include "acb.h" +#include "nfloat.h" + +#include "gr.h" +#include "nfloat.h" +#include "gr_vec.h" +#include "gr_mat.h" +#include "gr_special.h" +#include "fmpz_mat.h" + + +int +nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); + + +/* For printing */ +#include "arf.h" + +/* Arithmetic on fixed-point numbers in (-1,1) */ +/* x[0] stores the sign bit, x[1], ..., x[n] store the absolute value */ + +void +nfixed_print(nn_srcptr x, slong nlimbs, slong exp) +{ + arf_t t; + arf_init(t); + _arf_set_mpn_fixed(t, x + 1, nlimbs, nlimbs, x[0], nlimbs * FLINT_BITS, ARF_RND_DOWN); + arf_mul_2exp_si(t, t, exp); + arf_printd(t, nlimbs * FLINT_BITS / 3.321928 + 1); + arf_clear(t); +} + + +/* todo: don't do this */ +#define NFIXED_MAX_NLIMBS (2 * NFLOAT_MAX_LIMBS) + +FLINT_FORCE_INLINE +void nfixed_add(nn_ptr res, nn_srcptr a, nn_srcptr b, slong nlimbs) +{ + int asgn, bsgn; + asgn = a[0]; + bsgn = b[0]; + + if (asgn == bsgn) + { + res[0] = asgn; + mpn_add_n(res + 1, a + 1, b + 1, nlimbs); + } + else + { + res[0] = asgn ^ flint_mpn_signed_sub_n(res + 1, a + 1, b + 1, nlimbs); + } +} + +FLINT_FORCE_INLINE +void nfixed_sub(nn_ptr res, nn_srcptr a, nn_srcptr b, slong nlimbs) +{ + int asgn, bsgn; + asgn = a[0]; + bsgn = b[0]; + + if (asgn != bsgn) + { + res[0] = asgn; + mpn_add_n(res + 1, a + 1, b + 1, nlimbs); + } + else + { + res[0] = asgn ^ flint_mpn_signed_sub_n(res + 1, a + 1, b + 1, nlimbs); + } +} + +FLINT_FORCE_INLINE +void nfixed_mul(nn_ptr res, nn_srcptr a, nn_srcptr b, slong nlimbs) +{ + int asgn, bsgn; + asgn = a[0]; + bsgn = b[0]; + res[0] = asgn ^ bsgn; + flint_mpn_mulhigh_n(res + 1, a + 1, b + 1, nlimbs); +} + +FLINT_FORCE_INLINE +void nfixed_sqr(nn_ptr res, nn_srcptr a, slong nlimbs) +{ + res[0] = 0; + flint_mpn_sqrhigh(res + 1, a + 1, nlimbs); +} + +FLINT_FORCE_INLINE +void nfixed_div2(nn_ptr res, nn_srcptr a, slong nlimbs) +{ + res[0] = a[0]; + mpn_rshift(res + 1, a + 1, nlimbs, 1); +} + +/* A is (m x n), B is (n x p), C is (m x p) */ +void +_nfixed_mat_mul_classical(nn_ptr C, nn_srcptr A, nn_srcptr B, slong m, slong n, slong p, slong nlimbs) +{ + slong i, j, k; + nn_ptr t; + TMP_INIT; + + TMP_START; + + t = TMP_ALLOC((nlimbs + 1) * sizeof(ulong)); + +#define A_ENTRY(i, j) ((A) + ((i) * n + (j)) * (nlimbs + 1)) +#define B_ENTRY(i, j) ((B) + ((i) * p + (j)) * (nlimbs + 1)) +#define C_ENTRY(i, j) ((C) + ((i) * p + (j)) * (nlimbs + 1)) + + for (i = 0; i < m; i++) + { + for (j = 0; j < p; j++) + { + nfixed_mul(C_ENTRY(i, j), A_ENTRY(i, 0), B_ENTRY(0, j), nlimbs); + + for (k = 1; k < n; k++) + { + nfixed_mul(t, A_ENTRY(i, k), B_ENTRY(k, j), nlimbs); + nfixed_add(C_ENTRY(i, j), C_ENTRY(i, j), t, nlimbs); + } + } + } + + TMP_END; + +#undef A_ENTRY +#undef B_ENTRY +#undef C_ENTRY +} + +/* compute c += (a1 + b1) * (a2 + b2) */ +/* val0, val1, val2 are scratch space */ +FLINT_FORCE_INLINE void +addmul_addadd(nn_ptr val0, nn_ptr val1, nn_ptr val2, nn_ptr c, nn_srcptr a1, nn_srcptr b1, nn_srcptr a2, nn_srcptr b2, slong nlimbs) +{ + nfixed_add(val1, a1, b1, nlimbs); + nfixed_add(val2, a2, b2, nlimbs); + nfixed_mul(val0, val1, val2, nlimbs); + nfixed_add(c, c, val0, nlimbs); +} + +/* compute c += (a1 - b1) * (a2 - b2) */ +/* val0, val1, val2 are scratch space */ +FLINT_FORCE_INLINE void +addmul_subsub(nn_ptr val0, nn_ptr val1, nn_ptr val2, nn_ptr c, nn_srcptr a1, nn_srcptr b1, nn_srcptr a2, nn_srcptr b2, slong nlimbs) +{ + nfixed_sub(val1, a1, b1, nlimbs); + nfixed_sub(val2, a2, b2, nlimbs); + nfixed_mul(val0, val1, val2, nlimbs); + nfixed_add(c, c, val0, nlimbs); +} + +void +_nfixed_mat_mul_waksman(nn_ptr C, nn_srcptr A, nn_srcptr B, slong m, slong n, slong p, slong nlimbs) +{ + slong l, j, k; + + nn_ptr Ctmp = flint_calloc((nlimbs + 1) * ((p + m) + 5), sizeof(ulong)); + /* Ctmp itself has m * p entries */ + nn_ptr Crow = Ctmp; /* Crow has p entries */ + nn_ptr Ccol = Crow + (nlimbs + 1) * p; /* Ccol has m entries */ + nn_ptr val0 = Ccol + (nlimbs + 1) * m; /* val0 has room for 2 sums */ + nn_ptr val1 = val0 + (nlimbs + 1) * 2; /* val1 has room for 1 sum */ + nn_ptr val2 = val1 + (nlimbs + 1); /* val2 has room for 1 sum */ + nn_ptr crow = val2 + (nlimbs + 1); /* crow has room for 1 sum */ + +#define A_ENTRY(i, j) ((A) + ((i) * n + (j)) * (nlimbs + 1)) +#define B_ENTRY(i, j) ((B) + ((i) * p + (j)) * (nlimbs + 1)) +#define C_ENTRY(i, j) ((C) + ((i) * p + (j)) * (nlimbs + 1)) + +#define Crow_ENTRY(ii) (Crow + (ii) * (nlimbs + 1)) +#define Ccol_ENTRY(ii) (Ccol + (ii) * (nlimbs + 1)) + + slong np = n >> 1; + + for (j = 1; j <= np; j++) + { + slong j2 = (j << 1) - 1; + + for (k = 0; k < p; k++) + { + addmul_addadd(val0, val1, val2, C_ENTRY(0, k), A_ENTRY(0, j2-1), B_ENTRY(j2, k), A_ENTRY(0, j2), B_ENTRY(j2-1, k), nlimbs); + addmul_subsub(val0, val1, val2, Crow_ENTRY(k), A_ENTRY(0, j2-1), B_ENTRY(j2, k), A_ENTRY(0, j2), B_ENTRY(j2-1, k), nlimbs); + } + + for (l = 1; l < m; l++) + { + addmul_addadd(val0, val1, val2, C_ENTRY(l, 0), A_ENTRY(l, j2-1), B_ENTRY(j2, 0), A_ENTRY(l, j2), B_ENTRY(j2-1, 0), nlimbs); + addmul_subsub(val0, val1, val2, Ccol_ENTRY(l), A_ENTRY(l, j2-1), B_ENTRY(j2, 0), A_ENTRY(l, j2), B_ENTRY(j2-1, 0), nlimbs); + } + + for (k = 1; k < p; k++) + { + for (l = 1; l < m; l++) + { + addmul_addadd(val0, val1, val2, C_ENTRY(l, k), A_ENTRY(l, j2-1), B_ENTRY(j2, k), A_ENTRY(l, j2), B_ENTRY(j2-1, k), nlimbs); + } + } + } + + for (l = 1; l < m; l++) + { + nfixed_add(val1, Ccol_ENTRY(l), C_ENTRY(l, 0), nlimbs); + nfixed_div2(Ccol_ENTRY(l), val1, nlimbs); + nfixed_sub(C_ENTRY(l, 0), C_ENTRY(l, 0), Ccol_ENTRY(l), nlimbs); + } + + nfixed_add(val1, Crow, C_ENTRY(0, 0), nlimbs); + nfixed_div2(val0, val1, nlimbs); + nfixed_sub(C_ENTRY(0, 0), C_ENTRY(0, 0), val0, nlimbs); + + for (k = 1; k < p; k++) + { + nfixed_add(crow, Crow_ENTRY(k), C_ENTRY(0, k), nlimbs); + nfixed_div2(val1, crow, nlimbs); + nfixed_sub(C_ENTRY(0, k), C_ENTRY(0, k), val1, nlimbs); + nfixed_sub(crow, val1, val0, nlimbs); + + for (l = 1; l < m; l++) + { + nfixed_sub(val2, C_ENTRY(l, k), crow, nlimbs); + nfixed_sub(C_ENTRY(l, k), val2, Ccol_ENTRY(l), nlimbs); + } + } + + if ((n & 1) == 1) + { + for (l = 0; l < m; l++) + { + for (k = 0; k < p; k++) + { + nfixed_mul(val0, A_ENTRY(l, n-1), B_ENTRY(n-1, k), nlimbs); + nfixed_add(C_ENTRY(l, k), C_ENTRY(l, k), val0, nlimbs); + } + } + } + + flint_free(Ctmp); + +#undef A_ENTRY +#undef B_ENTRY +#undef C_ENTRY +} + +FLINT_FORCE_INLINE void +_nfloat_get_nfixed(nn_ptr res, nn_srcptr x, slong exp, slong fix_nlimbs, gr_ctx_t ctx) +{ + slong rel_exp; + + /* assumes res is already zeroed */ + if (NFLOAT_IS_ZERO(x)) + return; + + rel_exp = NFLOAT_EXP(x) - exp; + if (rel_exp >= 0) + flint_abort(); + + res[0] = NFLOAT_SGNBIT(x); + _arf_get_integer_mpn(res + 1, NFLOAT_D(x), NFLOAT_CTX_NLIMBS(ctx), fix_nlimbs * FLINT_BITS + rel_exp); +} + +FLINT_FORCE_INLINE int +_nfloat_set_nfixed(nn_ptr res, nn_srcptr x, slong exp, slong fix_nlimbs, gr_ctx_t ctx) +{ + return nfloat_set_mpn_2exp(res, x + 1, fix_nlimbs, exp, x[0], ctx); +} + +static void +_nfloat_mat_exp_range(slong * _Amin, slong * _Amax, const gr_mat_t A, gr_ctx_t ctx) +{ + slong Amax, Amin; + slong m = A->r; + slong n = A->c; + slong exp, i, j; + slong sz = ctx->sizeof_elem; + + Amax = WORD_MIN; + Amin = WORD_MAX; + + for (i = 0; i < m; i++) + { + for (j = 0; j < n; j++) + { + exp = NFLOAT_EXP(GR_MAT_ENTRY(A, i, j, sz)); + Amax = FLINT_MAX(Amax, exp); + Amin = FLINT_MIN(Amin, exp); + } + } + + _Amin[0] = Amin; + _Amax[0] = Amax; +} + +int +_nfloat_mat_mul_fixed_given_exp(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong Aexp, slong Bexp, slong fnlimbs, int waksman, gr_ctx_t ctx) +{ + nn_ptr T, TA, TB, TC; + slong i, j; + slong sz = ctx->sizeof_elem; + slong fdnlimbs; + + slong m = A->r; + slong n = A->c; + slong p = B->c; + + /* limbs including sign limb */ + fdnlimbs = fnlimbs + 1; + + T = flint_calloc(fdnlimbs * (m * n + n * p + m * p), sizeof(ulong)); + + TA = T; + TB = TA + fdnlimbs * (m * n); + TC = TB + fdnlimbs * (n * p); + + for (i = 0; i < m; i++) + for (j = 0; j < n; j++) + _nfloat_get_nfixed(TA + i * fdnlimbs * n + j * fdnlimbs, GR_MAT_ENTRY(A, i, j, sz), Aexp, fnlimbs, ctx); + + for (i = 0; i < n; i++) + for (j = 0; j < p; j++) + _nfloat_get_nfixed(TB + i * fdnlimbs * p + j * fdnlimbs, GR_MAT_ENTRY(B, i, j, sz), Bexp, fnlimbs, ctx); + + if (waksman) + _nfixed_mat_mul_waksman(TC, TA, TB, m, n, p, fnlimbs); + else + _nfixed_mat_mul_classical(TC, TA, TB, m, n, p, fnlimbs); + + for (i = 0; i < m; i++) + for (j = 0; j < p; j++) + _nfloat_set_nfixed(GR_MAT_ENTRY(C, i, j, sz), TC + i * fdnlimbs * p + j * fdnlimbs, Aexp + Bexp, fnlimbs, ctx); + + flint_free(T); + + return GR_SUCCESS; +} + +int +_nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksman, slong max_extra_bits, gr_ctx_t ctx) +{ + slong Amax, Amin, Bmax, Bmin, Adelta, Bdelta, Aexp, Bexp; + slong prec; + slong pad_top, pad_bot, extra_bits, fbits, fnlimbs; + slong n = A->c; + + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) + return GR_UNABLE; + + prec = NFLOAT_CTX_PREC(ctx); + + _nfloat_mat_exp_range(&Amin, &Amax, A, ctx); + _nfloat_mat_exp_range(&Bmin, &Bmax, B, ctx); + + if (Amax < NFLOAT_MIN_EXP || Bmax < NFLOAT_MIN_EXP) + return gr_mat_zero(C, ctx); + + /* Currently, we don't handle zeros. (They pose no problem, but zero entries in + the output may not be exact. To be done.) */ + if (Amin < NFLOAT_MIN_EXP || Bmin < NFLOAT_MIN_EXP) + return gr_mat_mul_classical(C, A, B, ctx); + + Adelta = Amax - Amin; + Bdelta = Bmax - Bmin; + + /* sanity check */ + if (Adelta > 10 * prec || Bdelta > 10 * prec) + return gr_mat_mul_classical(C, A, B, ctx); + + /* + To double check: for Waksman, + * The intermediate entries are bounded by 8n max(|A|,|B|)^2. + * The error, including error from converting + the input matrices, is bounded by 8n ulps. + */ + + pad_top = 3 + FLINT_BIT_COUNT(n); + pad_bot = 3 + FLINT_BIT_COUNT(n); + + extra_bits = Adelta + Bdelta + pad_top + pad_bot; + + if (extra_bits >= max_extra_bits) + return gr_mat_mul_classical(C, A, B, ctx); + + Aexp = Amax + pad_top; + Bexp = Bmax + pad_top; + fbits = prec + extra_bits; + fnlimbs = (fbits + FLINT_BITS - 1) / FLINT_BITS; + + return _nfloat_mat_mul_fixed_given_exp(C, A, B, Aexp, Bexp, fnlimbs, waksman, ctx); +} + +int +nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +{ + return _nfloat_mat_mul_fixed(C, A, B, 0, 100000, ctx); +} + +int +nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +{ + return _nfloat_mat_mul_fixed(C, A, B, 1, 100000, ctx); +} + + +static void +_nfloat_2exp_get_fmpz(fmpz_t res, nfloat_srcptr x, slong fixexp, gr_ctx_t ctx) +{ + slong exp, zn; + mpz_ptr zz; + nn_ptr zp; + int negative; + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + if (NFLOAT_IS_SPECIAL(x)) + { + fmpz_zero(res); + return; + } + + exp = NFLOAT_EXP(x) - fixexp; + + if (exp <= 0) + { + fmpz_zero(res); + return; + } + + /* todo: small case */ + + negative = NFLOAT_SGNBIT(x); + + zn = (exp + FLINT_BITS - 1) / FLINT_BITS; + zz = _fmpz_promote(res); + zp = FLINT_MPZ_REALLOC(zz, zn); + _arf_get_integer_mpn(zp, NFLOAT_D(x), nlimbs, exp); + zz->_mp_size = negative ? -zn : zn; + _fmpz_demote_val(res); +} + +int +nfloat_mat_addmul_block_fallback(gr_mat_t C, + const gr_mat_t A, const gr_mat_t B, + slong block_start, + slong block_end, + gr_ctx_t ctx) +{ + slong M, P, n; + slong i, j, sz; + nn_ptr tmpB; + slong ndlimbs = NFLOAT_CTX_DATA_NLIMBS(ctx); + sz = ctx->sizeof_elem; + int status = GR_SUCCESS; + + M = A->r; + P = B->c; + + n = block_end - block_start; + + tmpB = flint_malloc(sizeof(ulong) * ndlimbs * (P * n)); + +#define AA(ii, jj) GR_MAT_ENTRY(A, ii, block_start + (jj), sz) + + for (i = 0; i < P; i++) + for (j = 0; j < n; j++) + flint_mpn_copyi(tmpB + (i * n + j) * ndlimbs, GR_MAT_ENTRY(B, block_start + j, i, sz), ndlimbs); + + for (i = 0; i < M; i++) + { + for (j = 0; j < P; j++) + { + status |= _nfloat_vec_dot(GR_MAT_ENTRY(C, i, j, sz), + (block_start == 0) ? NULL : GR_MAT_ENTRY(C, i, j, sz), 0, + GR_MAT_ENTRY(A, i, block_start, sz), + tmpB + j * n * ndlimbs, n, ctx); + } + } + + flint_free(tmpB); + + return status; +} + +int +nfloat_mat_addmul_block_prescaled(gr_mat_t C, + const gr_mat_t A, const gr_mat_t B, + slong block_start, + slong block_end, + const slong * A_min, /* A per-row bottom exponent */ + const slong * B_min, /* B per-row bottom exponent */ + gr_ctx_t ctx) +{ + slong M, P, n; + slong i, j; + slong M0, M1, P0, P1, Mstep, Pstep; + int status = GR_SUCCESS; + slong sz = ctx->sizeof_elem; + ulong t[NFLOAT_MAX_ALLOC]; + slong e; + + M = A->r; + P = B->c; + + n = block_end - block_start; + + /* Create sub-blocks to keep matrices nearly square. Necessary? */ +#if 1 + Mstep = (M < 2 * n) ? M : n; + Pstep = (P < 2 * n) ? P : n; +#else + Mstep = M; + Pstep = P; +#endif + + for (M0 = 0; M0 < M; M0 += Mstep) + { + for (P0 = 0; P0 < P; P0 += Pstep) + { + fmpz_mat_t AA, BB, CC; + + M1 = FLINT_MIN(M0 + Mstep, M); + P1 = FLINT_MIN(P0 + Pstep, P); + + fmpz_mat_init(AA, M1 - M0, n); + fmpz_mat_init(BB, n, P1 - P0); + fmpz_mat_init(CC, M1 - M0, P1 - P0); + + /* Convert to fixed-point matrices. */ + for (i = M0; i < M1; i++) + { + if (A_min[i] == WORD_MIN) /* only zeros in this row */ + continue; + + for (j = 0; j < n; j++) + _nfloat_2exp_get_fmpz(fmpz_mat_entry(AA, i - M0, j), GR_MAT_ENTRY(A, i, block_start + j, sz), A_min[i], ctx); + } + + for (i = P0; i < P1; i++) + { + if (B_min[i] == WORD_MIN) /* only zeros in this column */ + continue; + + for (j = 0; j < n; j++) + _nfloat_2exp_get_fmpz(fmpz_mat_entry(BB, j, i - P0), GR_MAT_ENTRY(B, block_start + j, i, sz), B_min[i], ctx); + } + + /* The main multiplication */ + fmpz_mat_mul(CC, AA, BB); + fmpz_mat_clear(AA); + fmpz_mat_clear(BB); + + /* Add to the result matrix */ + for (i = M0; i < M1; i++) + { + for (j = P0; j < P1; j++) + { + e = A_min[i] + B_min[j]; + + /* The first time we write this Cij */ + if (block_start == 0) + { + status |= nfloat_set_fmpz(GR_MAT_ENTRY(C, i, j, sz), fmpz_mat_entry(CC, i - M0, j - P0), ctx); + status |= nfloat_mul_2exp_si(GR_MAT_ENTRY(C, i, j, sz), GR_MAT_ENTRY(C, i, j, sz), e, ctx); + } + else + { + status |= nfloat_set_fmpz(t, fmpz_mat_entry(CC, i - M0, j - P0), ctx); + status |= nfloat_mul_2exp_si(t, t, e, ctx); + status |= nfloat_add(GR_MAT_ENTRY(C, i, j, sz), GR_MAT_ENTRY(C, i, j, sz), t, ctx); + } + } + } + + fmpz_mat_clear(CC); + } + } + + return status; +} + +FLINT_FORCE_INLINE slong +_nfloat_nbits(nfloat_srcptr x, slong nlimbs) +{ + nn_srcptr ad; + slong bits; + + ad = NFLOAT_D(x); + bits = FLINT_BITS * nlimbs; + + while (ad[0] == 0) + { + bits -= FLINT_BITS; + ad++; + } + + bits -= flint_ctz(ad[0]); + + return bits; +} + +/* todo: squaring optimizations */ +int +nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx) +{ + slong M, N, P; + slong *A_min, *A_max, *B_min, *B_max; + short *A_bits, *B_bits; + slong *A_bot, *B_bot; + slong block_start, block_end, i, j, bot, top, max_height; + slong b, A_max_bits, B_max_bits; + nfloat_srcptr t; + double A_density, B_density; + slong sz = ctx->sizeof_elem; + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + slong prec = NFLOAT_CTX_PREC(ctx); + int status = GR_SUCCESS; + + M = A->r; + N = A->c; + P = B->r; + + if (N != B->r || M != C->r || P != C->c) + return GR_DOMAIN; + + if (M == 0 || N == 0 || P == 0) + return gr_mat_zero(C, ctx); + + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) + return GR_UNABLE; + + if (A == C || B == C) + { + gr_mat_t T; + gr_mat_init(T, M, P, ctx); + status = nfloat_mat_mul_block(T, A, B, min_block_size, ctx); + status |= gr_mat_swap_entrywise(T, C, ctx); + gr_mat_clear(T, ctx); + return status; + } + + /* bottom exponents of A */ + A_bot = flint_malloc(sizeof(slong) * M * N); + /* minimum bottom exponent in current row */ + A_min = flint_malloc(sizeof(slong) * M); + /* maximum top exponent in current row */ + A_max = flint_malloc(sizeof(slong) * M); + + B_bot = flint_malloc(sizeof(slong) * N * P); + B_min = flint_malloc(sizeof(slong) * P); + B_max = flint_malloc(sizeof(slong) * P); + + /* save space using shorts to store the bit sizes temporarily; + the block algorithm will not be used at extremely high precision */ + A_bits = flint_malloc(sizeof(short) * M * N); + B_bits = flint_malloc(sizeof(short) * N * P); + + A_max_bits = B_max_bits = 0; + A_density = B_density = 0; + + /* Build table of bottom exponents (WORD_MIN signifies a zero), + and also collect some statistics. */ + for (i = 0; i < M; i++) + { + for (j = 0; j < N; j++) + { + t = GR_MAT_ENTRY(A, i, j, sz); + if (NFLOAT_IS_ZERO(t)) + { + A_bot[i * N + j] = WORD_MIN; + A_bits[i * N + j] = 0; + } + else + { + b = _nfloat_nbits(t, nlimbs); + A_bot[i * N + j] = NFLOAT_EXP(t) - b; + A_bits[i * N + j] = b; + A_max_bits = FLINT_MAX(A_max_bits, b); + A_density++; + } + } + } + + for (i = 0; i < N; i++) + { + for (j = 0; j < P; j++) + { + t = GR_MAT_ENTRY(B, i, j, sz); + if (NFLOAT_IS_ZERO(t)) + { + B_bot[i * P + j] = WORD_MIN; + B_bits[i * P + j] = 0; + } + else + { + b = _nfloat_nbits(t, nlimbs); + B_bot[i * P + j] = NFLOAT_EXP(t) - b; + B_bits[i * P + j] = b; + B_max_bits = FLINT_MAX(B_max_bits, b); + B_density++; + } + } + } + + A_density = A_density / (M * N); + B_density = B_density / (N * P); + + /* Don't shift too far when creating integer block matrices. */ + max_height = 1.25 * FLINT_MIN(prec, FLINT_MAX(A_max_bits, B_max_bits)) + 192; + + /* FIXME: this condition is bogus */ + if (A_density < 0.1 && B_density < 0.1 && max_height > 1024) + { + status = gr_mat_mul_classical(C, A, B, ctx); + goto cleanup; + } + + block_start = 0; + while (block_start < N) + { + /* Find a run of columns of A and rows of B such that the + bottom exponents differ by at most max_height. */ + + block_end = block_start + 1; /* index is exclusive block_end */ + + /* begin with this column of A and row of B */ + for (i = 0; i < M; i++) + { + A_max[i] = A_min[i] = A_bot[i * N + block_start]; + A_max[i] += (slong) A_bits[i * N + block_start]; + } + + for (i = 0; i < P; i++) + { + B_max[i] = B_min[i] = B_bot[block_start * P + i]; + B_max[i] += (slong) B_bits[block_start * P + i]; + } + + while (block_end < N) + { + double size; + + /* End block if memory would be excessive. */ + /* Necessary? */ + /* Should also do initial check above, if C alone is too large. */ + size = (block_end - block_start) * M * (double) A_max_bits; + size += (block_end - block_start) * P * (double) B_max_bits; + size += (M * P) * (double) (A_max_bits + B_max_bits); + size /= 8.0; + if (size > 2e9) + goto blocks_built; + + /* check if we can extend with column [block_end] of A */ + for (i = 0; i < M; i++) + { + bot = A_bot[i * N + block_end]; + /* zeros are irrelevant */ + if (bot == WORD_MIN || A_max[i] == WORD_MIN) + continue; + top = bot + (slong) A_bits[i * N + block_end]; + /* jump will be too big */ + if (top > A_min[i] + max_height || bot < A_max[i] - max_height) + goto blocks_built; + } + + /* check if we can extend with row [block_end] of B */ + for (i = 0; i < P; i++) + { + bot = B_bot[block_end * P + i]; + if (bot == WORD_MIN || B_max[i] == WORD_MIN) + continue; + top = bot + (slong) B_bits[block_end * P + i]; + if (top > B_min[i] + max_height || bot < B_max[i] - max_height) + goto blocks_built; + } + + /* second pass to update the extreme values */ + for (i = 0; i < M; i++) + { + bot = A_bot[i * N + block_end]; + top = bot + (slong) A_bits[i * N + block_end]; + if (A_max[i] == WORD_MIN) + { + A_max[i] = top; + A_min[i] = bot; + } + else if (bot != WORD_MIN) + { + if (bot < A_min[i]) A_min[i] = bot; + if (top > A_max[i]) A_max[i] = top; + } + } + + for (i = 0; i < P; i++) + { + bot = B_bot[block_end * P + i]; + top = bot + (slong) B_bits[block_end * P + i]; + if (B_max[i] == WORD_MIN) + { + B_max[i] = top; + B_min[i] = bot; + } + else if (bot != WORD_MIN) + { + if (bot < B_min[i]) B_min[i] = bot; + if (top > B_max[i]) B_max[i] = top; + } + } + + block_end++; + } + + blocks_built: + if (block_end - block_start < min_block_size) + { + block_end = FLINT_MIN(N, block_start + min_block_size); + status |= nfloat_mat_addmul_block_fallback(C, A, B, block_start, block_end, ctx); + } + else + { + status |= nfloat_mat_addmul_block_prescaled(C, A, B, block_start, block_end, A_min, B_min, ctx); + } + + block_start = block_end; + } + +cleanup: + flint_free(A_bot); + flint_free(A_max); + flint_free(A_min); + flint_free(B_bot); + flint_free(B_max); + flint_free(B_min); + flint_free(A_bits); + flint_free(B_bits); + + return status; +} + +/* Minimum precision for using fixed-point arithmetic */ + +/* TODO: for *unsigned* matrices, there is a speedup already for + prec = 192. Consider inlining fixed-point additions/subtractions for + 4 and 5 limbs to extend this to general matrices. */ +/* #define NFLOAT_MAT_MUL_FIXED_CUTOFF 192 */ +#define NFLOAT_MAT_MUL_FIXED_CUTOFF 320 + +/* first cutoff: classical -> fixed_classical + second cutoff: fixed_classical -> waksman */ +static const int nfloat_mat_mul_cutoff_tab[][2] = { + {0, 0}, /* prec = 0 */ + {0, 0}, /* prec = 64 */ + {0, 0}, /* prec = 128 */ + {32, 32}, /* prec = 192 */ + {8, 20}, /* prec = 256 */ + {4, 15}, /* prec = 320 */ + {3, 10}, /* prec = 384 */ + {3, 10}, /* prec = 448 */ + {3, 8}, /* prec = 512 */ + {10, 10}, /* prec = 576 */ + {4, 5}, /* prec = 640 */ +}; + +/* {4, 4} from this point */ +#define NFLOAT_MAT_MUL_CUTOFF_4 704 +/* {3, 3} from this point */ +#define NFLOAT_MAT_MUL_CUTOFF_3 1600 + +int +nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +{ + slong cutoff1, cutoff2, dim; + int use_waksman = 0; + slong prec; + slong max_extra_prec; + + slong m = A->r; + slong n = A->c; + slong p = B->c; + + dim = FLINT_MIN(n, FLINT_MIN(m, p)); + + if (dim <= 2 || NFLOAT_CTX_HAS_INF_NAN(ctx)) + return gr_mat_mul_classical(C, A, B, ctx); + + if (dim <= 80) + { + prec = NFLOAT_CTX_PREC(ctx); + + if (prec < NFLOAT_MAT_MUL_FIXED_CUTOFF) + return gr_mat_mul_classical(C, A, B, ctx); + + if (prec >= NFLOAT_MAT_MUL_CUTOFF_3) + cutoff1 = cutoff2 = 3; + else if (prec >= NFLOAT_MAT_MUL_CUTOFF_4) + cutoff1 = cutoff2 = 4; + else + { + cutoff1 = nfloat_mat_mul_cutoff_tab[prec / 64][0]; + cutoff2 = nfloat_mat_mul_cutoff_tab[prec / 64][1]; + } + + if (dim < cutoff1) + return gr_mat_mul_classical(C, A, B, ctx); + + use_waksman = (dim >= cutoff2); + max_extra_prec = (prec < 768) ? 64 : prec / 4; + + return _nfloat_mat_mul_fixed(C, A, B, use_waksman, max_extra_prec, ctx); + } + else + { + return nfloat_mat_mul_block(C, A, B, 70, ctx); + } +} diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index ccf0293288..10cb533916 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -98,7 +98,7 @@ nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx) int status; arf_init(t); - arf_randtest(t, state, NFLOAT_CTX_PREC(ctx), 10); + arf_randtest(t, state, NFLOAT_CTX_PREC(ctx), n_randint(state, 2) ? 2 : 10); status = nfloat_set_arf(res, t, ctx); arf_clear(t); return status; diff --git a/src/nfloat/test/main.c b/src/nfloat/test/main.c index 0a129221f1..8520994f89 100644 --- a/src/nfloat/test/main.c +++ b/src/nfloat/test/main.c @@ -13,6 +13,7 @@ #include "t-add_sub_n.c" #include "t-addmul_submul.c" +#include "t-mat_mul.c" #include "t-nfloat.c" #include "t-nfloat_complex.c" @@ -22,6 +23,7 @@ test_struct tests[] = { TEST_FUNCTION(add_sub_n), TEST_FUNCTION(addmul_submul), + TEST_FUNCTION(mat_mul), TEST_FUNCTION(nfloat), TEST_FUNCTION(nfloat_complex), }; diff --git a/src/nfloat/test/t-mat_mul.c b/src/nfloat/test/t-mat_mul.c new file mode 100644 index 0000000000..dad6ad625c --- /dev/null +++ b/src/nfloat/test/t-mat_mul.c @@ -0,0 +1,103 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "test_helpers.h" +#include "gr.h" +#include "gr_mat.h" +#include "nfloat.h" + +int +nfloat_mat_mul_block1(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +{ + return nfloat_mat_mul_block(C, A, B, 1, ctx); +} + +TEST_FUNCTION_START(mat_mul, state) +{ + gr_ctx_t ctx; + slong prec; + slong iter; + gr_ptr tol; + + for (iter = 0; iter < 10 * flint_test_multiplier(); iter++) + { + if (n_randint(state, 5)) + prec = FLINT_BITS * (1 + n_randint(state, 4)); + else + prec = FLINT_BITS * (1 + n_randint(state, NFLOAT_MAX_LIMBS)); + + nfloat_ctx_init(ctx, prec, 0); + + tol = gr_heap_init(ctx); + GR_MUST_SUCCEED(gr_one(tol, ctx)); + GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 2, ctx)); + + gr_mat_test_approx_mul_max_norm( + (gr_method_mat_binary_op) nfloat_mat_mul_waksman, + tol, state, (prec <= 256) ? 10 : 1, 10, ctx); + + gr_mat_test_approx_mul_max_norm( + (gr_method_mat_binary_op) nfloat_mat_mul_block1, + tol, state, (prec <= 256) ? 10 : 1, + (prec <= 256) ? 40 : 20, ctx); + + gr_mat_test_approx_mul_max_norm( + (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical, + tol, state, (prec <= 256) ? 10 : 1, + (prec <= 256) ? 40 : 20, ctx); + + if (n_randint(state, 4) == 0) + gr_mat_test_approx_mul_max_norm( + (gr_method_mat_binary_op) nfloat_mat_mul, + tol, state, 1, 120, ctx); + + gr_heap_clear(tol, ctx); + gr_ctx_clear(ctx); + } + + for (iter = 0; iter < 100 * flint_test_multiplier(); iter++) + { + if (n_randint(state, 5)) + prec = FLINT_BITS * (1 + n_randint(state, 4)); + else + prec = FLINT_BITS * (1 + n_randint(state, NFLOAT_MAX_LIMBS)); + + nfloat_ctx_init(ctx, prec, 0); + + tol = gr_heap_init(ctx); + GR_MUST_SUCCEED(gr_one(tol, ctx)); + GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 6, ctx)); + + gr_mat_test_approx_mul_pos_entrywise_accurate( + (gr_method_mat_binary_op) nfloat_mat_mul_waksman, + tol, state, (prec <= 256) ? 10 : 1, 10, ctx); + + gr_mat_test_approx_mul_pos_entrywise_accurate( + (gr_method_mat_binary_op) nfloat_mat_mul_block1, + tol, state, (prec <= 256) ? 10 : 1, + (prec <= 256) ? 40 : 20, ctx); + + gr_mat_test_approx_mul_pos_entrywise_accurate( + (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical, + tol, state, (prec <= 256) ? 10 : 1, + (prec <= 256) ? 40 : 20, ctx); + + if (n_randint(state, 4) == 0) + gr_mat_test_approx_mul_pos_entrywise_accurate( + (gr_method_mat_binary_op) nfloat_mat_mul, + tol, state, 1, 120, ctx); + + gr_heap_clear(tol, ctx); + gr_ctx_clear(ctx); + } + + TEST_FUNCTION_END(state); +} diff --git a/src/python/flint_ctypes.py b/src/python/flint_ctypes.py index d2641b32e3..f12975d88c 100644 --- a/src/python/flint_ctypes.py +++ b/src/python/flint_ctypes.py @@ -1196,6 +1196,41 @@ def csgn(ctx, x): def arg(ctx, x): return ctx._unary_op(x, libgr.gr_arg, "arg($x)") + def min(ctx, x, y): + """ + >>> QQ.min(QQ(1)/3, QQ(1)/4) + 1/4 + >>> RR.min(3, RR.pi()) + 3.000000000000000 + >>> RR.min(RR("11 +/- 1"), RR("12 +/- 3")) + [1e+1 +/- 2.01] + >>> CC.min(2, 3) + 2.000000000000000 + >>> CC.min(2, CC.i()) + Traceback (most recent call last): + ... + FlintUnableError: failed to compute min(x, y) in {Complex numbers (acb, prec = 53)} for {x = 2.000000000000000}, {y = 1.000000000000000*I} + + """ + return ctx._binary_op(x, y, libgr.gr_min, "min($x, $y)") + + def max(ctx, x, y): + """ + >>> QQ.max(QQ(1)/3, QQ(1)/4) + 1/3 + >>> RR.max(3, RR.pi()) + [3.141592653589793 +/- 5.61e-16] + >>> RR.max(RR("10 +/- 1"), RR("9 +/- 3")) + [1e+1 +/- 2.01] + >>> CC.max(2, 3) + 3.000000000000000 + >>> CC.max(2, CC.i()) + Traceback (most recent call last): + ... + FlintUnableError: failed to compute max(x, y) in {Complex numbers (acb, prec = 53)} for {x = 2.000000000000000}, {y = 1.000000000000000*I} + """ + return ctx._binary_op(x, y, libgr.gr_max, "max($x, $y)") + def inf(ctx): """ Positive infinity (for extended number sets which support it). @@ -5474,6 +5509,60 @@ def __setitem__(self, ij, v): if status & GR_DOMAIN: raise ValueError return x + def norm_max(self): + """ + >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_max() + 9.000000000000000 + """ + element_ring = self.parent()._element_ring + res = element_ring() + status = libgr.gr_mat_norm_max(res._ref, self._ref, element_ring._ref) + if status: + if status & GR_UNABLE: raise NotImplementedError + if status & GR_DOMAIN: raise ValueError + return res + + + def norm_1(self): + """ + >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_1() + 18.00000000000000 + """ + element_ring = self.parent()._element_ring + res = element_ring() + status = libgr.gr_mat_norm_1(res._ref, self._ref, element_ring._ref) + if status: + if status & GR_UNABLE: raise NotImplementedError + if status & GR_DOMAIN: raise ValueError + return res + + def norm_inf(self): + """ + >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_inf() + 24.00000000000000 + """ + element_ring = self.parent()._element_ring + res = element_ring() + status = libgr.gr_mat_norm_inf(res._ref, self._ref, element_ring._ref) + if status: + if status & GR_UNABLE: raise NotImplementedError + if status & GR_DOMAIN: raise ValueError + return res + + def norm_frobenius(self): + """ + >>> Mat(RR)([[1,2,3],[4,5,6],[7,8,9]]).norm_frobenius() + [16.88194301613413 +/- 3.73e-15] + """ + element_ring = self.parent()._element_ring + res = element_ring() + status = libgr.gr_mat_norm_frobenius(res._ref, self._ref, element_ring._ref) + if status: + if status & GR_UNABLE: raise NotImplementedError + if status & GR_DOMAIN: raise ValueError + return res + + def nullspace(self): """ Right kernel (nullspace) of this matrix.