Merge pull request #1766 from flintlib/divexact2

Implement fmpz_poly_divexact
flintlib · Feb 3, 2024 · 3f901aa · 3f901aa
2 parents d792df7 + 4bee365
commit 3f901aa
Show file tree

Hide file tree

Showing 23 changed files with 239 additions and 50 deletions.
diff --git a/doc/source/fmpz_poly.rst b/doc/source/fmpz_poly.rst
@@ -1824,6 +1824,10 @@ Euclidean division
     Computes the quotient ``(Q, len-1)`` of ``(A, len)`` upon
     division by `x - c`.
 
+.. function:: void _fmpz_poly_divexact(fmpz * Q, const fmpz * A, slong lenA, const fmpz * B, slong lenB)
+              void fmpz_poly_divexact(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B)
+
+    Like :func:`fmpz_poly_div`, but assumes that the division is exact.
 
 Division with precomputed inverse
 --------------------------------------------------------------------------------

diff --git a/doc/source/history.rst b/doc/source/history.rst
@@ -93,7 +93,7 @@ Jean Kieffer (JK).
     currently benefit significantly due to wrapper overheads (some Arb benchmarks
     run ~5% faster with this change). (AA, FJ).
   * Faster ``_fmpz_vec_dot`` (FJ).
-  * Faster basecase ``fmpz_poly`` and ``fmpz_mat`` basecase algorithms
+  * Faster ``fmpz_poly`` and ``fmpz_mat`` basecase algorithms
     based on dot products.
   * Optimized ``fmpz_mat_mul_classical`` (FJ).
   * Added ``fmpz_mat_mul_waksman``, speeding up ``fmpz_mat`` multiplication

diff --git a/src/fmpq_poly/xgcd.c b/src/fmpq_poly/xgcd.c
@@ -76,8 +76,8 @@ void _fmpq_poly_xgcd(fmpz *G, fmpz_t denG,
         lenD = lenB - lenG + 1;
         C = _fmpz_vec_init(lenC + lenD);
         D = C + lenC;
-        _fmpz_poly_div(C, primA, lenA, G, lenG, 0);
-        _fmpz_poly_div(D, primB, lenB, G, lenG, 0);
+        _fmpz_poly_divexact(C, primA, lenA, G, lenG);
+        _fmpz_poly_divexact(D, primB, lenB, G, lenG);
     }
     else
     {

diff --git a/src/fmpz_mpoly_factor/bpoly_factor.c b/src/fmpz_mpoly_factor/bpoly_factor.c
@@ -139,7 +139,7 @@ void fmpz_bpoly_make_primitive(fmpz_poly_t g, fmpz_bpoly_t A)
 
     for (i = 0; i < A->length; i++)
     {
-        fmpz_poly_div(q, A->coeffs + i, g);
+        fmpz_poly_divexact(q, A->coeffs + i, g);
         fmpz_poly_swap(A->coeffs + i, q);
     }
 

diff --git a/src/fmpz_mpoly_factor/lcc_kaltofen.c b/src/fmpz_mpoly_factor/lcc_kaltofen.c
@@ -151,8 +151,8 @@ static void _make_bases_coprime(
         fmpz_poly_gcd(g, A->p + i, B->p + j);
         if (fmpz_poly_degree(g) > 0)
         {
-            fmpz_poly_div(A->p + i, A->p + i, g);
-            fmpz_poly_div(B->p + j, B->p + j, g);
+            fmpz_poly_divexact(A->p + i, A->p + i, g);
+            fmpz_poly_divexact(B->p + j, B->p + j, g);
             fmpz_poly_factor_fit_length(A, A->num + 1);
             fmpz_poly_set(A->p + A->num, g);
             A->exp[A->num] = A->exp[i];

diff --git a/src/fmpz_poly.h b/src/fmpz_poly.h
@@ -711,6 +711,9 @@ void _fmpz_poly_divrem_preinv(fmpz * Q, fmpz * A, slong len1,
 void fmpz_poly_divrem_preinv(fmpz_poly_t Q, fmpz_poly_t R,
             const fmpz_poly_t A, const fmpz_poly_t B, const fmpz_poly_t B_inv);
 
+void _fmpz_poly_divexact(fmpz * Q, const fmpz * A, slong lenA, const fmpz * B, slong lenB);
+void fmpz_poly_divexact(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B);
+
 fmpz ** _fmpz_poly_powers_precompute(const fmpz * B, slong len);
 
 void fmpz_poly_powers_precompute(fmpz_poly_powers_precomp_t pinv,

diff --git a/src/fmpz_poly/cos_minpoly.c b/src/fmpz_poly/cos_minpoly.c
@@ -246,7 +246,7 @@ _fmpz_poly_cos_minpoly(fmpz * f, ulong n)
         }
     }
 
-    _fmpz_poly_div(f, P, Plen, Q, Qlen, 0);
+    _fmpz_poly_divexact(f, P, Plen, Q, Qlen);
 
     _fmpz_vec_clear(P, Pdeg + 1);
     _fmpz_vec_clear(Q, Pdeg + 1);

diff --git a/src/fmpz_poly/divexact.c b/src/fmpz_poly/divexact.c
@@ -0,0 +1,108 @@
+/*
+    Copyright (C) 2024 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "fmpz.h"
+#include "fmpz_vec.h"
+#include "fmpz_poly.h"
+#include "gr.h"
+#include "gr_poly.h"
+
+void
+_fmpz_poly_divexact(fmpz * Q, const fmpz * A, slong lenA,
+                                         const fmpz * B, slong lenB)
+{
+    slong lenQ = lenA - lenB + 1;
+
+    if (lenQ == 1)
+    {
+        fmpz_divexact(Q, A + lenA - 1, B + lenB - 1);
+    }
+    else if (lenB == 1)
+    {
+        if (fmpz_is_pm1(B))
+            _fmpz_vec_scalar_mul_fmpz(Q, A, lenA, B);
+        else
+            _fmpz_vec_scalar_divexact_fmpz(Q, A, lenA, B);
+    }
+    else if (lenQ <= 100 || lenB <= 16)
+    {
+        gr_ctx_t ctx;
+        gr_ctx_init_fmpz(ctx);
+        GR_MUST_SUCCEED(_gr_poly_divexact_basecase_bidirectional(Q, A, lenA, B, lenB, ctx));
+    }
+    else
+    {
+        /* todo: the true cutoffs are sometimes higher, especially with
+           unbalanced operands, possibly because divconquer division needs tuning */
+        slong A_bits, B_bits, B_cutoff, Q_cutoff;
+        gr_ctx_t ctx;
+        gr_ctx_init_fmpz(ctx);
+
+        A_bits = _fmpz_vec_max_bits(A, lenQ);
+        B_bits = _fmpz_vec_max_bits(B, FLINT_MIN(lenB, lenQ));
+        A_bits = FLINT_ABS(A_bits);
+        B_bits = FLINT_ABS(B_bits);
+
+        B_cutoff = (B_bits > 3000) ? 20 : 60;
+        Q_cutoff = (A_bits > 1000) ? 100 : 200;
+
+        if (A_bits >= 100 * B_bits)
+        {
+            Q_cutoff *= 2;
+            B_cutoff *= 2;
+        }
+
+        if (lenQ <= Q_cutoff || lenB <= B_cutoff)
+            GR_MUST_SUCCEED(_gr_poly_divexact_basecase_bidirectional(Q, A, lenA, B, lenB, ctx));
+        else
+            _fmpz_poly_div(Q, A, lenA, B, lenB, 0);
+    }
+
+}
+
+void
+fmpz_poly_divexact(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B)
+{
+    fmpz_poly_t T;
+    slong lenA = A->length;
+    slong lenB = B->length;
+    slong lenQ = lenA - lenB + 1;
+
+    if (lenB == 0)
+    {
+        flint_throw(FLINT_ERROR, "Exception (fmpz_poly_divexact). Division by zero.\n");
+    }
+
+    if (lenA < lenB)
+    {
+        fmpz_poly_zero(Q);
+        return;
+    }
+
+    if (Q == A || Q == B)
+    {
+        fmpz_poly_init2(T, lenQ);
+        _fmpz_poly_divexact(T->coeffs, A->coeffs, lenA, B->coeffs, lenB);
+        _fmpz_poly_set_length(T, lenQ);
+        fmpz_poly_swap(T, Q);
+        fmpz_poly_clear(T);
+    }
+    else
+    {
+        fmpz_poly_fit_length(Q, lenQ);
+        _fmpz_poly_divexact(Q->coeffs, A->coeffs, lenA, B->coeffs, lenB);
+        _fmpz_poly_set_length(Q, lenQ);
+    }
+
+    /* should not be needed, but produce something normalised in case
+       this was called with invalid input */
+    _fmpz_poly_normalise(Q);
+}
diff --git a/src/fmpz_poly/lcm.c b/src/fmpz_poly/lcm.c
@@ -39,7 +39,7 @@ void _fmpz_poly_lcm(fmpz * res, const fmpz * poly1, slong len1,
         slong lenV = len1 + len2 - lenW;
 
         V = _fmpz_vec_init(lenV);
-        _fmpz_poly_div(V, res, len1 + len2 - 1, W, lenW, 0);
+        _fmpz_poly_divexact(V, res, len1 + len2 - 1, W, lenW);
         if (fmpz_sgn(V + (lenV - 1)) > 0)
             _fmpz_vec_set(res, V, lenV);
         else

diff --git a/src/fmpz_poly/remove.c b/src/fmpz_poly/remove.c
@@ -73,7 +73,7 @@ fmpz_poly_remove(fmpz_poly_t res, const fmpz_poly_t poly1,
 
         while (i > 0 && !fmpz_poly_divides(q, poly1, p))
         {
-            fmpz_poly_div(p, p, poly2);
+            fmpz_poly_divexact(p, p, poly2);
             i--;
         }
 

diff --git a/src/fmpz_poly/test/main.c b/src/fmpz_poly/test/main.c
@@ -47,6 +47,7 @@
 #include "t-discriminant.c"
 #include "t-div_basecase.c"
 #include "t-div_divconquer.c"
+#include "t-divexact.c"
 #include "t-divhigh_smodp.c"
 #include "t-divides.c"
 #include "t-divlow_smodp.c"
@@ -224,6 +225,7 @@ test_struct tests[] =
     TEST_FUNCTION(fmpz_poly_discriminant),
     TEST_FUNCTION(fmpz_poly_div_basecase),
     TEST_FUNCTION(fmpz_poly_div_divconquer),
+    TEST_FUNCTION(fmpz_poly_divexact),
     TEST_FUNCTION(fmpz_poly_divhigh_smodp),
     TEST_FUNCTION(fmpz_poly_divides),
     TEST_FUNCTION(fmpz_poly_divlow_smodp),

diff --git a/src/fmpz_poly/test/t-divexact.c b/src/fmpz_poly/test/t-divexact.c
@@ -0,0 +1,72 @@
+/*
+    Copyright (C) 2009 William Hart
+    Copyright (C) 2010 Sebastian Pancratz
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "test_helpers.h"
+#include "fmpz.h"
+#include "fmpz_poly.h"
+#include "ulong_extras.h"
+
+TEST_FUNCTION_START(fmpz_poly_divexact, state)
+{
+    int i, result;
+
+    for (i = 0; i < 1000 * flint_test_multiplier(); i++)
+    {
+        fmpz_poly_t a, b, c, q;
+        int aliasing;
+
+        fmpz_poly_init(a);
+        fmpz_poly_init(b);
+        fmpz_poly_init(c);
+        fmpz_poly_init(q);
+
+        fmpz_poly_randtest(a, state, 1 + n_randint(state, 100), 1 + n_randint(state, 200));
+        fmpz_poly_randtest_not_zero(b, state, 1 + n_randint(state, 100), 1 + n_randint(state, 200));
+        fmpz_poly_mul(c, a, b);
+        aliasing = n_randint(state, 3);
+
+        if (aliasing == 0)
+        {
+            fmpz_poly_divexact(q, c, b);
+        }
+        else if (aliasing == 1)
+        {
+            fmpz_poly_set(q, c);
+            fmpz_poly_divexact(q, q, b);
+        }
+        else
+        {
+            fmpz_poly_set(q, b);
+            fmpz_poly_divexact(q, c, q);
+        }
+
+        result = fmpz_poly_equal(q, a);
+
+        if (!result)
+        {
+            flint_printf("FAIL:\n");
+            fmpz_poly_print(a), flint_printf("\n\n");
+            fmpz_poly_print(b), flint_printf("\n\n");
+            fmpz_poly_print(c), flint_printf("\n\n");
+            fmpz_poly_print(q), flint_printf("\n\n");
+            fflush(stdout);
+            flint_abort();
+        }
+
+        fmpz_poly_clear(a);
+        fmpz_poly_clear(b);
+        fmpz_poly_clear(c);
+        fmpz_poly_clear(q);
+    }
+
+    TEST_FUNCTION_END(state);
+}
diff --git a/src/fmpz_poly_factor/factor_squarefree.c b/src/fmpz_poly_factor/factor_squarefree.c
@@ -47,8 +47,8 @@ void fmpz_poly_factor_squarefree(fmpz_poly_factor_t fac, const fmpz_poly_t F)
             fmpz_poly_init(w);
             fmpz_poly_init(s);
 
-            fmpz_poly_div(v, f, d);
-            fmpz_poly_div(w, t1, d);
+            fmpz_poly_divexact(v, f, d);
+            fmpz_poly_divexact(w, t1, d);
 
             for (i = 1; ; i++)
             {
@@ -63,8 +63,8 @@ void fmpz_poly_factor_squarefree(fmpz_poly_factor_t fac, const fmpz_poly_t F)
                 }
 
                 fmpz_poly_gcd(d, v, s);
-                fmpz_poly_div(v, v, d);
-                fmpz_poly_div(w, s, d);
+                fmpz_poly_divexact(v, v, d);
+                fmpz_poly_divexact(w, s, d);
 
                 if (d->length > 1)
                     fmpz_poly_factor_insert(fac, d, i);

diff --git a/src/fmpz_poly_mat/fflu.c b/src/fmpz_poly_mat/fflu.c
@@ -84,7 +84,7 @@ fmpz_poly_mat_fflu(fmpz_poly_mat_t B, fmpz_poly_t den, slong * perm,
                 fmpz_poly_mul(t, E(j, pivot_col), E(pivot_row, k));
                 fmpz_poly_sub(E(j, k), E(j, k), t);
                 if (pivot_row > 0)
-                    fmpz_poly_div(E(j, k), E(j, k), den);
+                    fmpz_poly_divexact(E(j, k), E(j, k), den);
             }
         }
 

diff --git a/src/fmpz_poly_mat/rref.c b/src/fmpz_poly_mat/rref.c
@@ -70,7 +70,7 @@ fmpz_poly_mat_rref(fmpz_poly_mat_t R, fmpz_poly_t den, const fmpz_poly_mat_t A)
                     fmpz_poly_sub(tmp, tmp, tmp2);
                 }
 
-                fmpz_poly_div(fmpz_poly_mat_entry(R, i, nonpivots[k]),
+                fmpz_poly_divexact(fmpz_poly_mat_entry(R, i, nonpivots[k]),
                     tmp, fmpz_poly_mat_entry(R, i, pivots[i]));
             }
         }

diff --git a/src/fmpz_poly_mat/solve_fflu_precomp.c b/src/fmpz_poly_mat/solve_fflu_precomp.c
@@ -66,7 +66,7 @@ fmpz_poly_mat_solve_fflu_precomp(fmpz_poly_mat_t X,
                 fmpz_poly_mul(T, LU(j, i), XX(i, k));
                 fmpz_poly_sub(XX(j, k), XX(j, k), T);
                 if (i > 0)
-                    fmpz_poly_div(XX(j, k), XX(j, k), LU(i-1, i-1));
+                    fmpz_poly_divexact(XX(j, k), XX(j, k), LU(i-1, i-1));
             }
         }
 
@@ -79,7 +79,7 @@ fmpz_poly_mat_solve_fflu_precomp(fmpz_poly_mat_t X,
                 fmpz_poly_mul(T, XX(j, k), LU(i, j));
                 fmpz_poly_sub(XX(i, k), XX(i, k), T);
             }
-            fmpz_poly_div(XX(i, k), XX(i, k), LU(i, i));
+            fmpz_poly_divexact(XX(i, k), XX(i, k), LU(i, i));
         }
     }
 

diff --git a/src/fmpz_poly_q/add.c b/src/fmpz_poly_q/add.c
@@ -82,8 +82,8 @@ void fmpz_poly_q_add_in_place(fmpz_poly_q_t rop, const fmpz_poly_q_t op)
         fmpz_poly_init(r2);
         fmpz_poly_init(s2);
 
-        fmpz_poly_div(r2, rop->den, d);
-        fmpz_poly_div(s2, op->den, d);
+        fmpz_poly_divexact(r2, rop->den, d);
+        fmpz_poly_divexact(s2, op->den, d);
 
         fmpz_poly_mul(rop->num, rop->num, s2);
         fmpz_poly_mul(s2, op->num, r2);  /* Using s2 as temp */
@@ -102,8 +102,8 @@ void fmpz_poly_q_add_in_place(fmpz_poly_q_t rop, const fmpz_poly_q_t op)
 
             if (!fmpz_poly_is_one(r2))
             {
-                fmpz_poly_div(rop->num, rop->num, r2);
-                fmpz_poly_div(rop->den, rop->den, r2);
+                fmpz_poly_divexact(rop->num, rop->num, r2);
+                fmpz_poly_divexact(rop->den, rop->den, r2);
             }
         }
         fmpz_poly_clear(r2);
@@ -209,8 +209,8 @@ fmpz_poly_q_add(fmpz_poly_q_t rop,
         fmpz_poly_init(r2);
         fmpz_poly_init(s2);
 
-        fmpz_poly_div(r2, op1->den, d);  /* +ve leading coeff */
-        fmpz_poly_div(s2, op2->den, d);  /* +ve leading coeff */
+        fmpz_poly_divexact(r2, op1->den, d);  /* +ve leading coeff */
+        fmpz_poly_divexact(s2, op2->den, d);  /* +ve leading coeff */
 
         fmpz_poly_mul(rop->num, op1->num, s2);
         fmpz_poly_mul(rop->den, op2->num, r2);  /* Using rop->den as temp */
@@ -229,8 +229,8 @@ fmpz_poly_q_add(fmpz_poly_q_t rop,
 
             if (!fmpz_poly_is_one(r2))
             {
-                fmpz_poly_div(rop->num, rop->num, r2);
-                fmpz_poly_div(rop->den, rop->den, r2);
+                fmpz_poly_divexact(rop->num, rop->num, r2);
+                fmpz_poly_divexact(rop->den, rop->den, r2);
             }
         }
 

diff --git a/src/fmpz_poly_q/canonicalise.c b/src/fmpz_poly_q/canonicalise.c
@@ -28,8 +28,8 @@ void fmpz_poly_q_canonicalise(fmpz_poly_q_t rop)
     fmpz_poly_gcd(gcd, rop->num, rop->den);
     if (!fmpz_poly_is_unit(gcd))
     {
-        fmpz_poly_div(rop->num, rop->num, gcd);
-        fmpz_poly_div(rop->den, rop->den, gcd);
+        fmpz_poly_divexact(rop->num, rop->num, gcd);
+        fmpz_poly_divexact(rop->den, rop->den, gcd);
     }
     fmpz_poly_clear(gcd);