From 794400eaa0475d0b1afeb28c826f274933d32c31 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Mon, 29 Apr 2019 17:17:44 +0200
Subject: [PATCH 01/63] DixonRNSSolver base

---
 linbox/solutions/methods.h |   8 +++
 linbox/solutions/solve.h   |   6 +++
 tests/test-solve-full.C    | 101 +++++++++++++++++++------------------
 3 files changed, 66 insertions(+), 49 deletions(-)
diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h
index 56fb077b9..072eba56e 100644
--- a/linbox/solutions/methods.h
+++ b/linbox/solutions/methods.h
@@ -218,6 +218,9 @@ namespace LinBox {
         // @fixme SingularSolutionType::Deterministic fails with Dense Dixon
         SingularSolutionType singularSolutionType = SingularSolutionType::Random;
 
+        // ----- For DixonRNS method.
+        uint32_t primeBaseLength = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl.
+
         // ----- For random-based systems.
         size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up.
         bool certifyInconsistency = false; //!< Whether the solver should attempt to find a certificate of inconsistency if
@@ -263,6 +266,11 @@ namespace LinBox {
         // (Numerische Mathematik - Dixon 1982)
         DEFINE_METHOD(Dixon, RingCategories::IntegerTag);
 
+        // Method::DixonRNS uses RNS features over Dixon's p-adic lifting.
+        // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005)
+        // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
+        DEFINE_METHOD(DixonRNS, RingCategories::IntegerTag);
+
         // Method::ChineseRemainder uses the chinese remainder algorithm
         // to solve the problem on multiple modular domains,
         // and finally reconstruct the solution.
diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h
index 224215e38..2e91f58ca 100644
--- a/linbox/solutions/solve.h
+++ b/linbox/solutions/solve.h
@@ -84,6 +84,11 @@ namespace LinBox {
      *      |   - SparseMatrix  > `RationalSolver<..., Method::SparseElimination>`
      *      |   - Otherwise     >  Error
      *      - Otherwise > Error
+     * - Method::DixonRNS
+     *      - IntegerTag
+     *      |   - DenseMatrix   > `DixonRNSSolver`
+     *      |   - Otherwise     >  Error
+     *      - Otherwise > Error
      * - Method::Blackbox > Method::Wiedemann
      * - Method::Wiedemann
      *      - ModularTag > `WiedemannSolver`
@@ -337,6 +342,7 @@ namespace LinBox {
 // Integer-based
 #include "./solve/solve-cra.h"
 #include "./solve/solve-dixon.h"
+#include "./solve/solve-dixon-rns.h"
 #include "./solve/solve-numeric-symbolic.h"
 
 // Blackbox
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index d40bfb941..4caf811bf 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -260,31 +260,31 @@ int main(int argc, char** argv)
 
     bool ok = true;
     do {
-        // ----- Rational Auto
-        ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        // @fixme Dixon<Wiedemann> does not compile
-        // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
-        ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
-        // ----- Rational CRA
-        // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
-        ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
-        ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
-        // ----- Rational Dixon
-        ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        // @fixme Dixon<Wiedemann> does not compile
-        // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // // ----- Rational Auto
+        // ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // // @fixme Dixon<Wiedemann> does not compile
+        // // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+
+        // ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+
+        // // ----- Rational CRA
+        // // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
+        // ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+
+        // ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+
+        // // ----- Rational Dixon
+        // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // // @fixme Dixon<Wiedemann> does not compile
+        // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
         // ----- Rational SymbolicNumeric
         // @note SymbolicNumeric methods are only implemented on DenseMatrix
@@ -295,30 +295,33 @@ int main(int argc, char** argv)
         // ok = ok && test_sparse_solve(Method::SymbolicNumericNorm(method), ZZ, QQ, m, n, bitSize, vectorBitSize,
         // seed, verbose);
 
-        // ----- Modular Auto
-        ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_blackbox_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
-
-        // ----- Modular Blackbox
-        ok = ok && test_dense_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_sparse_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_blackbox_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose);
-
-        // ----- Modular DenseElimination
-        ok = ok && test_dense_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_sparse_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_blackbox_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose);
-
-        // ----- Modular SparseElimination
-        ok = ok && test_dense_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_sparse_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_blackbox_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose);
-
-        // ----- Modular Wiedemann
-        ok = ok && test_dense_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_sparse_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose);
-        ok = ok && test_blackbox_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose);
+        // ----- Rational DixonRNS
+        ok = ok && test_dense_solve(Method::DixonRNS(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+
+        // // ----- Modular Auto
+        // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_blackbox_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
+
+        // // ----- Modular Blackbox
+        // ok = ok && test_dense_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_blackbox_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose);
+
+        // // ----- Modular DenseElimination
+        // ok = ok && test_dense_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_blackbox_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose);
+
+        // // ----- Modular SparseElimination
+        // ok = ok && test_dense_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_blackbox_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose);
+
+        // // ----- Modular Wiedemann
+        // ok = ok && test_dense_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose);
+        // ok = ok && test_blackbox_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose);
 
         // ----- Modular Lanczos
         // @fixme Dense is segfaulting

From 5ff5d81b20d5b21cdf42f2ade5117670fc03b80e Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 3 May 2019 11:37:51 +0200
Subject: [PATCH 02/63] Dixon RNS base (again)

---
 linbox/algorithms/dixon-rns-solver.h     | 82 ++++++++++++++++++++++++
 linbox/algorithms/dixon-rns-solver.inl   | 44 +++++++++++++
 linbox/solutions/solve/solve-dixon-rns.h | 53 +++++++++++++++
 3 files changed, 179 insertions(+)
 create mode 100644 linbox/algorithms/dixon-rns-solver.h
 create mode 100644 linbox/algorithms/dixon-rns-solver.inl
 create mode 100644 linbox/solutions/solve/solve-dixon-rns.h

diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/dixon-rns-solver.h
new file mode 100644
index 000000000..0d0036a69
--- /dev/null
+++ b/linbox/algorithms/dixon-rns-solver.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright(C) LinBox
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the  GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * ========LICENCE========
+ */
+
+#pragma once
+
+#include <linbox/solutions/methods.h>
+
+namespace LinBox {
+    /**
+     * @fixme Should this just be a different LiftingContainer?
+     *
+     * Chen/Storjohann RNS-based p-adic lifting.
+     * The algorithm solves Ax = b over the integers.
+     *
+     * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
+     * A BLAS Based C Library for Exact Linear Algebra on Integer Matrices (ISSAC 2009)
+     *
+     *  Dixon algorithm goes this way:
+     *      (i)     Compute B := A^{-1} mod p
+     *              (with p a random number which is hopefully orthogonal to det(A))
+     *      (ii)    Compute (ci) such that A^{-1} b = c0 + c1 p + ... + ci p^i mod p^{i+1}
+     *              Which means:    r = b
+     *                              for i = 0 .. k-1:
+     *                              |   ci = B r mod p
+     *                              |   r = (r - A ci) / p
+     *              (stop when p^k > 2ND given by Hadamard bound)
+     *      (iii)   Rational reconstruct with c = c0 + c1 p + ... + ck p^{k-1} (over the integers)
+     *
+     * The RNS part:
+     *      (i)     Use p = p1p2...pl with an arbitrary l
+     *      (ii)    We can compute the residues for each pj by having ci expressed in an RNS system.
+    *                               r = b
+     *                              for i = 0 .. k-1:
+     *                              |   for j = 0 .. l-1:
+     *                              |   |   ci[j] = B r mod pj
+     *                              |   (Q, R) = such that r = pQ + R with |R| < p
+     *                              |   r = Q + (R - A ci) / p      < Matrix-vector multiplication done in RNS domain
+     *                              |                                 and final addition over ZZ
+     *              /!\ @fixme I do not get it, how can 1 / p be computed in the RNS system, or is it just R - A ci?
+     *              /!\ @fixme The paper does not talk about matrix-matrix multiplication,
+     *              but instead about exploiting RNS.
+     *      (iii)   Having solved the system for each pj, we first RNS-reconstruct the solution mod p
+     *              before rational reconstruction.
+     *
+     * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`.
+     * According to the paper, a value of l = 2 (ln(n) + log2(||A||)) or without the factor 2
+     * can be used, but it depends on the problem, really.
+     */
+    template <class Field, class Ring, class PrimeGenerator>
+    class DixonRNSSolver {
+    public:
+        DixonRNSSolver(const Ring& ring, PrimeGenerator primeGenerator);
+
+        /**
+         * Dense solving.
+         */
+        template <class IntVector, class Vector>
+        void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix<Ring>& A,
+                   const Vector& b, const Method::DixonRNS& m);
+    };
+}
+
+#include "./dixon-rns-solver.inl"
\ No newline at end of file
diff --git a/linbox/algorithms/dixon-rns-solver.inl b/linbox/algorithms/dixon-rns-solver.inl
new file mode 100644
index 000000000..6752598d3
--- /dev/null
+++ b/linbox/algorithms/dixon-rns-solver.inl
@@ -0,0 +1,44 @@
+/*
+ * Copyright(C) LinBox
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the  GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * ========LICENCE========
+ */
+
+#pragma once
+
+#include <linbox/solutions/methods.h>
+
+namespace LinBox {
+    template <class Field, class Ring, class PrimeGenerator>
+    inline DixonRNSSolver<Field, Ring, PrimeGenerator>::DixonRNSSolver(
+        const Ring& ring, PrimeGenerator primeGenerator)
+    {
+    }
+
+    /**
+     * Dense solving.
+     */
+    template <class Field, class Ring, class PrimeGenerator>
+    template <class IntVector, class Vector>
+    inline void DixonRNSSolver<Field, Ring, PrimeGenerator>::solve(
+        IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix<Ring>& A,
+        const Vector& b, const Method::DixonRNS& m)
+    {
+    }
+}
\ No newline at end of file
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
new file mode 100644
index 000000000..1da9dce2a
--- /dev/null
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright(C) LinBox
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the  GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * ========LICENCE========
+ */
+
+#pragma once
+
+#include <linbox/algorithms/dixon-rns-solver.h>
+
+namespace LinBox {
+    /**
+     * \brief Solve specialisation for DixonRNS on dense matrices.
+     */
+    template <class IntVector, class Ring, class Vector>
+    void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
+               const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
+    {
+        commentator().start("solve.dixon.integer.dense");
+
+        using Field = Givaro::ModularBalanced<double>;
+        using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
+        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
+
+        DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
+        solver.solve(xNum, xDen, A, b, m);
+
+        commentator().stop("solve.dixon.integer.dense");
+
+        // @fixme Implement something like that
+        // if (status == SS_INCONSISTENT) {
+        //     throw LinboxMathInconsistentSystem("From DixonRNS method.");
+        // } else if (status == SS_FAILED || status == SS_BAD_PRECONDITIONER) {
+        //     throw LinboxError("From DixonRNS method.");
+        // }
+    }
+}
\ No newline at end of file

From 92ab216c2db13c297fa66dcffce3e776c3743f5e Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 3 May 2019 16:01:22 +0200
Subject: [PATCH 03/63] More doc before implem

---
 linbox/algorithms/dixon-rns-solver.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/dixon-rns-solver.h
index 0d0036a69..161a2d088 100644
--- a/linbox/algorithms/dixon-rns-solver.h
+++ b/linbox/algorithms/dixon-rns-solver.h
@@ -46,23 +46,27 @@ namespace LinBox {
      *      (iii)   Rational reconstruct with c = c0 + c1 p + ... + ck p^{k-1} (over the integers)
      *
      * The RNS part:
-     *      (i)     Use p = p1p2...pl with an arbitrary l
-     *      (ii)    We can compute the residues for each pj by having ci expressed in an RNS system.
+     *      (i)     Use p = p0p1...p{lp-1} with an arbitrary lp and (q0, q1, ..., q{lq-1}) also primes.
+     *      (ii)    We now do our computation in a RNS system (p0, ..., p{lp-1}, q0, ..., q{lq-1}):
     *                               r = b
      *                              for i = 0 .. k-1:
-     *                              |   for j = 0 .. l-1:
-     *                              |   |   ci[j] = B r mod pj
+     *                              |   for j = 0 .. lq-1:
+     *                              |   |   ci[qj] = Bj r mod qj
      *                              |   (Q, R) = such that r = pQ + R with |R| < p
      *                              |   r = Q + (R - A ci) / p      < Matrix-vector multiplication done in RNS domain
      *                              |                                 and final addition over ZZ
-     *              /!\ @fixme I do not get it, how can 1 / p be computed in the RNS system, or is it just R - A ci?
+     *              @note (R - A ci) / p can be computed in a RNS system.
+     *              We know that (R - A ci) is divisible by p,
+     *              so its representation is 0 on all lp first terms of the representation, meaning
+     *              we just need representation of ci mod (q0, ..., q{lq-1}).
+     *              For the division part, we just have to multiply the RNS representation of (R - A ci) by
+     *              (1/p) mod (q0, ..., q{lq-1}).
      *              /!\ @fixme The paper does not talk about matrix-matrix multiplication,
      *              but instead about exploiting RNS.
-     *      (iii)   Having solved the system for each pj, we first RNS-reconstruct the solution mod p
-     *              before rational reconstruction.
+     *      (iii)   We first RNS-reconstruct the solution before rational reconstruction.
      *
      * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`.
-     * According to the paper, a value of l = 2 (ln(n) + log2(||A||)) or without the factor 2
+     * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2
      * can be used, but it depends on the problem, really.
      */
     template <class Field, class Ring, class PrimeGenerator>

From 264cdc89b24fd46b9c5288bf862e72c0dd2341ad Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Mon, 6 May 2019 15:51:31 +0200
Subject: [PATCH 04/63] Updated dixon RNS solver algorithm description

---
 linbox/algorithms/dixon-rns-solver.h | 56 ++++++++++++----------------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/dixon-rns-solver.h
index 161a2d088..60264e455 100644
--- a/linbox/algorithms/dixon-rns-solver.h
+++ b/linbox/algorithms/dixon-rns-solver.h
@@ -26,44 +26,34 @@
 
 namespace LinBox {
     /**
-     * @fixme Should this just be a different LiftingContainer?
+     * @fixme This should just be a different LiftingContainer!
      *
-     * Chen/Storjohann RNS-based p-adic lifting.
      * The algorithm solves Ax = b over the integers.
-     *
+     * It is based on Chen/Storjohann RNS-based p-adic lifting.
      * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
      * A BLAS Based C Library for Exact Linear Algebra on Integer Matrices (ISSAC 2009)
+     * But it has been slightly modified in order to use BLAS3 multiplication within the main loop.
      *
-     *  Dixon algorithm goes this way:
-     *      (i)     Compute B := A^{-1} mod p
-     *              (with p a random number which is hopefully orthogonal to det(A))
-     *      (ii)    Compute (ci) such that A^{-1} b = c0 + c1 p + ... + ci p^i mod p^{i+1}
-     *              Which means:    r = b
-     *                              for i = 0 .. k-1:
-     *                              |   ci = B r mod p
-     *                              |   r = (r - A ci) / p
-     *              (stop when p^k > 2ND given by Hadamard bound)
-     *      (iii)   Rational reconstruct with c = c0 + c1 p + ... + ck p^{k-1} (over the integers)
-     *
-     * The RNS part:
-     *      (i)     Use p = p0p1...p{lp-1} with an arbitrary lp and (q0, q1, ..., q{lq-1}) also primes.
-     *      (ii)    We now do our computation in a RNS system (p0, ..., p{lp-1}, q0, ..., q{lq-1}):
-    *                               r = b
-     *                              for i = 0 .. k-1:
-     *                              |   for j = 0 .. lq-1:
-     *                              |   |   ci[qj] = Bj r mod qj
-     *                              |   (Q, R) = such that r = pQ + R with |R| < p
-     *                              |   r = Q + (R - A ci) / p      < Matrix-vector multiplication done in RNS domain
-     *                              |                                 and final addition over ZZ
-     *              @note (R - A ci) / p can be computed in a RNS system.
-     *              We know that (R - A ci) is divisible by p,
-     *              so its representation is 0 on all lp first terms of the representation, meaning
-     *              we just need representation of ci mod (q0, ..., q{lq-1}).
-     *              For the division part, we just have to multiply the RNS representation of (R - A ci) by
-     *              (1/p) mod (q0, ..., q{lq-1}).
-     *              /!\ @fixme The paper does not talk about matrix-matrix multiplication,
-     *              but instead about exploiting RNS.
-     *      (iii)   We first RNS-reconstruct the solution before rational reconstruction.
+     *  RNS Dixon algorithm goes this way:
+     *      (i)     Use (p1, ..., pl) primes with an arbitrary l.
+     *      (ii)    Algorithm goes:
+     *                  for i = 1 .. l:
+     *                  |   Bi = A^{-1} mod pi                      < Pre-computing
+     *                  [r1|...|rl] = [b|...|b]
+     *                  [y1|...|yl] = [0|...|0]
+     *                  for j = 1 .. k:
+     *                  |   for i = 1 .. l:
+     *                  |   |   ci = Bi ri mod pi                   < Matrix-vector in Z/pZ
+     *                  |   |   yi = (yi * pi) + ci                 < Done over ZZ
+     *                  |   |   (Qi, Ri) = such that r = pi Qi + Ri with |Ri| < pi
+     *                  |   V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
+     *                  |   for i = 1 .. l:
+     *                  |   |   ri = Qi + (Vi / pi)
+     *              @note The computation of V can be done in a RNS system such that each RNS base-prime
+     *              is bigger than each (p1, ..., pl). This way, [R1|...|Rl] and [c1|...|cl] are zero-cost
+     *              to get in the RNS system.
+     *      (iii)   y = CRT_Reconstruct(y1, ..., yl)
+     *      (iv)    x = Rational_Reconstruct(y)
      *
      * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`.
      * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2

From a7155bf6b758ca52f180dc2b29614dd949145d75 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 14 May 2019 15:55:39 +0200
Subject: [PATCH 05/63] Base for MultiModLiftingContainer

---
 linbox/algorithms/dixon-rns-solver.inl        | 44 -----------------
 ...solver.h => multi-mod-lifting-container.h} | 48 ++++++++++++-------
 linbox/solutions/solve/solve-dixon-rns.h      | 44 +++++++++++++++--
 3 files changed, 70 insertions(+), 66 deletions(-)
 delete mode 100644 linbox/algorithms/dixon-rns-solver.inl
 rename linbox/algorithms/{dixon-rns-solver.h => multi-mod-lifting-container.h} (69%)

diff --git a/linbox/algorithms/dixon-rns-solver.inl b/linbox/algorithms/dixon-rns-solver.inl
deleted file mode 100644
index 6752598d3..000000000
--- a/linbox/algorithms/dixon-rns-solver.inl
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright(C) LinBox
- *
- * ========LICENCE========
- * This file is part of the library LinBox.
- *
- * LinBox is free software: you can redistribute it and/or modify
- * it under the terms of the  GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * ========LICENCE========
- */
-
-#pragma once
-
-#include <linbox/solutions/methods.h>
-
-namespace LinBox {
-    template <class Field, class Ring, class PrimeGenerator>
-    inline DixonRNSSolver<Field, Ring, PrimeGenerator>::DixonRNSSolver(
-        const Ring& ring, PrimeGenerator primeGenerator)
-    {
-    }
-
-    /**
-     * Dense solving.
-     */
-    template <class Field, class Ring, class PrimeGenerator>
-    template <class IntVector, class Vector>
-    inline void DixonRNSSolver<Field, Ring, PrimeGenerator>::solve(
-        IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix<Ring>& A,
-        const Vector& b, const Method::DixonRNS& m)
-    {
-    }
-}
\ No newline at end of file
diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/multi-mod-lifting-container.h
similarity index 69%
rename from linbox/algorithms/dixon-rns-solver.h
rename to linbox/algorithms/multi-mod-lifting-container.h
index 60264e455..7072eb866 100644
--- a/linbox/algorithms/dixon-rns-solver.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -26,8 +26,6 @@
 
 namespace LinBox {
     /**
-     * @fixme This should just be a different LiftingContainer!
-     *
      * The algorithm solves Ax = b over the integers.
      * It is based on Chen/Storjohann RNS-based p-adic lifting.
      * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
@@ -43,15 +41,15 @@ namespace LinBox {
      *                  [y1|...|yl] = [0|...|0]
      *                  for j = 1 .. k:
      *                  |   for i = 1 .. l:
-     *                  |   |   ci = Bi ri mod pi                   < Matrix-vector in Z/pZ
-     *                  |   |   yi = (yi * pi) + ci                 < Done over ZZ
      *                  |   |   (Qi, Ri) = such that r = pi Qi + Ri with |Ri| < pi
+     *                  |   |   ci = Bi ri mod pi                   < Matrix-vector in Z/pZ
+     *                  |   |   yi = yi + ci * pi^(i-1)             < Done over ZZ
      *                  |   V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
      *                  |   for i = 1 .. l:
      *                  |   |   ri = Qi + (Vi / pi)
-     *              @note The computation of V can be done in a RNS system such that each RNS base-prime
-     *              is bigger than each (p1, ..., pl). This way, [R1|...|Rl] and [c1|...|cl] are zero-cost
-     *              to get in the RNS system.
+     *              @note The computation of V can be done in a RNS system such that each RNS
+     * base-prime is bigger than each (p1, ..., pl). This way, [R1|...|Rl] and [c1|...|cl] are
+     * zero-cost to get in the RNS system.
      *      (iii)   y = CRT_Reconstruct(y1, ..., yl)
      *      (iv)    x = Rational_Reconstruct(y)
      *
@@ -59,18 +57,32 @@ namespace LinBox {
      * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2
      * can be used, but it depends on the problem, really.
      */
-    template <class Field, class Ring, class PrimeGenerator>
-    class DixonRNSSolver {
+    template <class _Field, class _Ring, class _PrimeGenerator>
+    class MultiModLiftingContainer final : public LiftingContainerBase<_Ring, DenseMatrix<_Ring>> {
+        using BaseClass = LiftingContainerBase<_Ring, DenseMatrix<_Ring>>;
+
+    public:
+        using typename BaseClass::Ring;
+        using typename BaseClass::IMatrix;
+        using typename BaseClass::IVector;
+
+        using Field = _Field;
+        using PrimeGenerator = _PrimeGenerator;
+
     public:
-        DixonRNSSolver(const Ring& ring, PrimeGenerator primeGenerator);
+        // @fixme
+        const std::vector<Integer> primes = {97, 101};
+
+        // @fixme Split to inline file
+        MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator,
+                                 const IMatrix& A, const IVector& b,
+                                 const Method::DixonRNS& m)
+            : BaseClass(ring, A, b, 97 * 101)
+        {
+        }
 
-        /**
-         * Dense solving.
-         */
-        template <class IntVector, class Vector>
-        void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix<Ring>& A,
-                   const Vector& b, const Method::DixonRNS& m);
+        IVector& nextdigit (IVector& , const IVector&) const final {
+
+        }
     };
 }
-
-#include "./dixon-rns-solver.inl"
\ No newline at end of file
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 1da9dce2a..c62e1252c 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -22,15 +22,51 @@
 
 #pragma once
 
-#include <linbox/algorithms/dixon-rns-solver.h>
+#include <linbox/algorithms/multi-mod-lifting-container.h>
 
 namespace LinBox {
+    // @fixme Move that to a file - and make it be a RationalSolver<Method::DixonRNS>
+    template <class Field, class Ring, class PrimeGenerator>
+    class DixonRNSSolver {
+    public:
+        DixonRNSSolver(const Ring& ring, PrimeGenerator& primeGenerator)
+            : _ring(ring)
+            , _primeGenerator(primeGenerator)
+        {
+            /* @todo */
+        }
+
+        /**
+         * Dense solving.
+         */
+        template <class RVector, class Vector>
+        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
+                   const Vector& b, const Method::DixonRNS& m)
+        {
+            // @fixme We should use some code from DixonSolver...
+            // But that's hard so we just assume that A is square and invertible.
+            linbox_check(A.rowdim() == A.coldim());
+
+            using LiftingContainer = MultiModLiftingContainer<Field, Ring, PrimeGenerator>;
+            LiftingContainer lc(_ring, _primeGenerator, A, b, m);
+            RationalReconstruction<LiftingContainer> re(lc);
+
+            if (!re.getRational(xNum, xDen, 0)) {
+                std::cerr << "OUCH!" << std::endl;
+            }
+        }
+
+    private:
+        const Ring& _ring;
+        PrimeGenerator& _primeGenerator;
+    };
+
     /**
      * \brief Solve specialisation for DixonRNS on dense matrices.
      */
-    template <class IntVector, class Ring, class Vector>
-    void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
-               const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
+    template <class RVector, class Ring, class Vector>
+    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
+               const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
     {
         commentator().start("solve.dixon.integer.dense");
 

From 8e85814a6d9f0e427595757af1f50971ae83279d Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Fri, 17 May 2019 07:59:33 +0200
Subject: [PATCH 06/63] More on lifting - r fill up

---
 .../algorithms/multi-mod-lifting-container.h  | 45 +++++++++++++++----
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 7072eb866..7ba60aa05 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -41,7 +41,7 @@ namespace LinBox {
      *                  [y1|...|yl] = [0|...|0]
      *                  for j = 1 .. k:
      *                  |   for i = 1 .. l:
-     *                  |   |   (Qi, Ri) = such that r = pi Qi + Ri with |Ri| < pi
+     *                  |   |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
      *                  |   |   ci = Bi ri mod pi                   < Matrix-vector in Z/pZ
      *                  |   |   yi = yi + ci * pi^(i-1)             < Done over ZZ
      *                  |   V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
@@ -62,27 +62,56 @@ namespace LinBox {
         using BaseClass = LiftingContainerBase<_Ring, DenseMatrix<_Ring>>;
 
     public:
-        using typename BaseClass::Ring;
         using typename BaseClass::IMatrix;
         using typename BaseClass::IVector;
+        using typename BaseClass::Ring;
 
         using Field = _Field;
         using PrimeGenerator = _PrimeGenerator;
 
     public:
-        // @fixme
-        const std::vector<Integer> primes = {97, 101};
+        // @fixme Have dynamic random ones
+        const std::vector<Integer> p = {97, 101};
 
         // @fixme Split to inline file
-        MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator,
-                                 const IMatrix& A, const IVector& b,
+        MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b,
                                  const Method::DixonRNS& m)
+            // @fixme Am forces to set the prime here? Why?
             : BaseClass(ring, A, b, 97 * 101)
+            , _ring(ring)
         {
-        }
+            // @note From baseClass, we have _length = log2(2 * N * D)
+
+            // @fixme Have l = log(||A||) + log(n) or so
+            uint32_t l = p.size();
+
+            // Ap[0] = A mod p[0]
+            // Ap[1] = A mod p[1]
 
-        IVector& nextdigit (IVector& , const IVector&) const final {
+            // B[0] = inv(Ap[0]) mod p[0] @fixme How?
+            // B[1] = inv(Ap[1]) mod p[1]
 
+            // @note As _r is row major, we store each ri on each row.
+            // So that r[i] = current residue for p[i].
+            _r = std::make_unique<DenseMatrix<Ring>>(_ring, l, b.size());
+            for (auto i = 0u; i < l; ++i) {
+                // @fixme Is there a vector domain to copy to a matrix?
+                for (auto j = 0u; j < b.size(); ++j) {
+                    _ring.assign(_r[i][j], b[j]);
+                }
+            }
         }
+
+        IVector& nextdigit(IVector&, const IVector&) const final
+        {
+            // @fixme With this design, are we forces to CRT_Reconstruct each ci?
+            // Is this bad?
+        }
+
+    private:
+        Ring& _ring;
+
+        // @note r is a big matrix in ZZ holding all residues
+        std::unique_ptr<DenseMatrix<Ring>> _r;
     };
 }

From 6b6deb40319151f040a7cfc10f0987117e3c0445 Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Fri, 17 May 2019 08:34:29 +0200
Subject: [PATCH 07/63] Multi mod lifting incoming!

---
 .../algorithms/multi-mod-lifting-container.h  | 58 +++++++++++++++++--
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 7ba60aa05..8c5883936 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -85,33 +85,81 @@ namespace LinBox {
             // @fixme Have l = log(||A||) + log(n) or so
             uint32_t l = p.size();
 
+            // @fixme Initialize fields _F[i]
+
             // Ap[0] = A mod p[0]
             // Ap[1] = A mod p[1]
 
-            // B[0] = inv(Ap[0]) mod p[0] @fixme How?
+            // B[0] = inv(Ap[0]) mod p[0]
             // B[1] = inv(Ap[1]) mod p[1]
+            // @fixme How?
 
             // @note As _r is row major, we store each ri on each row.
             // So that r[i] = current residue for p[i].
-            _r = std::make_unique<DenseMatrix<Ring>>(_ring, l, b.size());
+            _r.init(_ring, l, b.size());
             for (auto i = 0u; i < l; ++i) {
                 // @fixme Is there a vector domain to copy to a matrix?
                 for (auto j = 0u; j < b.size(); ++j) {
                     _ring.assign(_r[i][j], b[j]);
                 }
             }
+
+            // @fixme Allocate Q and R
+            // @fixme Allocate c
+
+            // @todo Set up an RNS system
         }
 
-        IVector& nextdigit(IVector&, const IVector&) const final
+        IVector& nextdigit(IVector& digit, const IVector& residu) const final
         {
-            // @fixme With this design, are we forces to CRT_Reconstruct each ci?
+            // @fixme The residu can't be r, here!
+            // So the overall does a lot more job than it needs.
+            // See below for the solution.
+
+            // @fixme With this design, are we forced to CRT_Reconstruct each ci?
             // Is this bad?
+            // If we don't want that, we need to not extent LiftingContainerBase,
+            // and reimplement some of the behavior.
+            // Because the only thing needed to user API (rational reconstruction)
+            // is bool next (IVector& digit) from iterator.
+
+            /*  for i = 1 .. l:
+             *  |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
+             *  |   ci = Bi Ri mod pi                   < Matrix-vector in Z/pZ
+             *  V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
+             *  for i = 1 .. l:
+             *  |   ri = Qi + (Vi / pi)
+             */
+
+            // @fixme Could be parallel!
+            for (auto i = 0u; i < l; ++i) {
+                Hom<Ring, Field> hom(_ring, _F[i]);
+
+                // @fixme How to do euclidian division?
+                // ri = pi Qi + Ri
+
+                // @todo If R might already be a field element
+                _B[i]->apply(_c[i], hom.convert(_R[i]));
+
+                // @todo Convert _c[i] to RNS
+            }
+
+            // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields?
+
+            // @fixme Compute the next residue!
+
+            return digit;
         }
 
     private:
         Ring& _ring;
 
         // @note r is a big matrix in ZZ holding all residues
-        std::unique_ptr<DenseMatrix<Ring>> _r;
+        IMatrix _r;
+        FMatrix _c;
+        std::vector<FMatrix> _B; // Inverses of A mod p[i]
+        std::vector<IVector> _Q;
+        std::vector<FVector> _R;
+        std::vector<Field> _F;
     };
 }

From d56184ecc00405437cd00ab632a58251215d7476 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 22 May 2019 14:21:12 +0200
Subject: [PATCH 08/63] Not using LiftingContainerBase anymore

---
 linbox/algorithms/lifting-container.h         |  28 ---
 .../algorithms/multi-mod-lifting-container.h  | 170 ++++++++++++++----
 2 files changed, 138 insertions(+), 60 deletions(-)

diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h
index 6a9180092..7735c9f89 100644
--- a/linbox/algorithms/lifting-container.h
+++ b/linbox/algorithms/lifting-container.h
@@ -283,34 +283,6 @@ namespace LinBox
 
 		};
 
-		/*- @brief Bit manipulation function for possible use in optimization.
-		 * efficiently pulls out continuous blocks of bits, from lsb to msb inclusive
-		 * least significant bits start at index 0, so msb >= lsb
-		 * if any bits with index >= 8*numBytes are asked for they will be zeroes
-		 */
-#if 0
-		static long long bytesToBits(unsigned char * byteArray, size_t numBytes, size_t lsb, size_t msb) {
-			linbox_check(msb >= lsb);
-			size_t lsbi = lsb >> 3;
-			size_t msbi = msb >> 3;
-			if (msbi == lsbi)
-				if (msbi >= numBytes)
-					return 0;
-				else
-					return (byteArray[lsbi] >> (lsb & 7)) & ((1 << (msb - lsb + 1)) - 1);
-
-			long long result = (msbi < numBytes) ? (byteArray[msbi] & ((1 << ((msb & 7)+1)) - 1)) : 0;
-			for (size_t i=msbi-1; i>lsbi; i--) {
-				result <<= 8;
-				result |= (i < numBytes) ? byteArray[i] : 0;
-			}
-			result <<= 8 - (lsb & 7);
-			result |= (lsbi < numBytes) ? (byteArray[lsbi] >> (lsb & 7)) : 0;
-
-			return result;
-		}
-#endif
-
 		const_iterator begin() const
 		{
 			return const_iterator(*this);
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 8c5883936..21ececa27 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -26,7 +26,10 @@
 
 namespace LinBox {
     /**
-     * The algorithm solves Ax = b over the integers.
+     * The algorithm find out the p-adic writing of A^{-1} * b.
+     * So that A^{-1} * b = c0 + c1 * p + c2 * p^2 + ... + c{k-1} * p^{k-1}.
+     * The chosen p is multi-modular.
+     *
      * It is based on Chen/Storjohann RNS-based p-adic lifting.
      * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
      * A BLAS Based C Library for Exact Linear Algebra on Integer Matrices (ISSAC 2009)
@@ -58,32 +61,36 @@ namespace LinBox {
      * can be used, but it depends on the problem, really.
      */
     template <class _Field, class _Ring, class _PrimeGenerator>
-    class MultiModLiftingContainer final : public LiftingContainerBase<_Ring, DenseMatrix<_Ring>> {
-        using BaseClass = LiftingContainerBase<_Ring, DenseMatrix<_Ring>>;
+    class MultiModLiftingContainer final : public LiftingContainer<_Ring> {
+        using BaseClass = LiftingContainer<_Ring>;
 
     public:
-        using typename BaseClass::IMatrix;
-        using typename BaseClass::IVector;
-        using typename BaseClass::Ring;
-
+        using Ring = _Ring;
         using Field = _Field;
         using PrimeGenerator = _PrimeGenerator;
 
+        using IElement = typename _Ring::Element;
+        using IMatrix = DenseMatrix<_Ring>;
+        using IVector = DenseVector<_Ring>;
+        using FMatrix = DenseMatrix<_Field>;
+        using FVector = DenseVector<_Field>;
+
     public:
-        // @fixme Have dynamic random ones
-        const std::vector<Integer> p = {97, 101};
+        // -------------------
+        // ----- Main behavior
 
         // @fixme Split to inline file
-        MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b,
-                                 const Method::DixonRNS& m)
-            // @fixme Am forces to set the prime here? Why?
-            : BaseClass(ring, A, b, 97 * 101)
-            , _ring(ring)
+        MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A,
+                                 const IVector& b, const Method::DixonRNS& m)
+            : _ring(ring)
+            , _r(_ring)
+            , _c(_field)
         {
+            // @fixme Compute hadamard and such
+
             // @note From baseClass, we have _length = log2(2 * N * D)
 
             // @fixme Have l = log(||A||) + log(n) or so
-            uint32_t l = p.size();
 
             // @fixme Initialize fields _F[i]
 
@@ -96,13 +103,13 @@ namespace LinBox {
 
             // @note As _r is row major, we store each ri on each row.
             // So that r[i] = current residue for p[i].
-            _r.init(_ring, l, b.size());
-            for (auto i = 0u; i < l; ++i) {
-                // @fixme Is there a vector domain to copy to a matrix?
-                for (auto j = 0u; j < b.size(); ++j) {
-                    _ring.assign(_r[i][j], b[j]);
-                }
-            }
+            // _r.init(_ring, l, b.size());
+            // for (auto i = 0u; i < l; ++i) {
+            //     // @fixme Is there a vector domain to copy to a matrix?
+            //     for (auto j = 0u; j < b.size(); ++j) {
+            //         _ring.assign(_r[i][j], b[j]);
+            //     }
+            // }
 
             // @fixme Allocate Q and R
             // @fixme Allocate c
@@ -110,7 +117,8 @@ namespace LinBox {
             // @todo Set up an RNS system
         }
 
-        IVector& nextdigit(IVector& digit, const IVector& residu) const final
+        // @fixme USELESS?
+        IVector& nextdigit(IVector& digit, const IVector& residu) const
         {
             // @fixme The residu can't be r, here!
             // So the overall does a lot more job than it needs.
@@ -132,17 +140,17 @@ namespace LinBox {
              */
 
             // @fixme Could be parallel!
-            for (auto i = 0u; i < l; ++i) {
-                Hom<Ring, Field> hom(_ring, _F[i]);
+            // for (auto i = 0u; i < l; ++i) {
+            //     Hom<Ring, Field> hom(_ring, _F[i]);
 
-                // @fixme How to do euclidian division?
-                // ri = pi Qi + Ri
+            //     // @fixme How to do euclidian division?
+            //     // ri = pi Qi + Ri
 
-                // @todo If R might already be a field element
-                _B[i]->apply(_c[i], hom.convert(_R[i]));
+            //     // @todo If R might already be a field element
+            //     _B[i]->apply(_c[i], hom.convert(_R[i]));
 
-                // @todo Convert _c[i] to RNS
-            }
+            //     // @todo Convert _c[i] to RNS
+            // }
 
             // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields?
 
@@ -151,8 +159,106 @@ namespace LinBox {
             return digit;
         }
 
+        // --------------------------
+        // ----- LiftingContainer API
+
+        const Ring& ring() const final { return _ring; }
+
+        /// The length of the container.
+        size_t length() const final { return _k; }
+
+        /// The dimension of the problem/solution.
+        size_t size() const final { return _n; }
+
+        /**
+         * We are compliant to the interface even though
+         * p is multi-modular and thus not a prime.
+         */
+        const IElement& prime() const final { return _p; }
+
+        // ------------------------------
+        // ----- NOT LiftingContainer API
+        // ----- but still needed
+
+		const IElement numbound() const
+		{
+			return _numbound;
+		}
+
+		const IElement denbound() const
+		{
+			return _denbound;
+		}
+
+        // --------------
+        // ----- Iterator
+
+        /**
+         * Needed API for rational reconstruction.
+         * Each call to next() will update
+         */
+        class const_iterator {
+        private:
+            BlasVector<Ring> _res;
+            const MultiModLiftingContainer& _lc;
+            size_t _position;
+
+        public:
+            const_iterator(const MultiModLiftingContainer& lc, size_t end = 0)
+                : _lc(lc)
+                , _position(end)
+            {
+                // @fixme Initialize _residue
+            }
+
+            /**
+             * Returns false if the next digit cannot be computed (bad modulus).
+             */
+            bool next(IVector& digit)
+            {
+                // compute v2 = _matA * digit
+                IVector v2(_lc.ring(), _lc.size());
+                // @fixme _lc._MAD.applyV(v2, digit, _res);
+
+                // update _res -= v2
+                // @fixme _lc._VDR.subin(_res, v2);
+                typename BlasVector<Ring>::iterator p0;
+
+                // update _res = _res / p
+                int index = 0;
+                for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) {
+                    _lc.ring().divin(*p0, _lc._p);
+                }
+
+                // increase position of the iterator
+                ++_position;
+                return true;
+            }
+
+            bool operator!=(const const_iterator& iterator) const
+            {
+                return _position != iterator._position;
+            }
+
+            bool operator==(const const_iterator& iterator) const
+            {
+                return _position == iterator._position;
+            }
+        };
+
+        const_iterator begin() const { return const_iterator(*this); }
+        const_iterator end() const { return const_iterator(*this, _k); }
+
     private:
-        Ring& _ring;
+        const Ring& _ring;
+        Field _field;
+
+        IElement _numbound;
+        IElement _denbound;
+
+        IElement _p;
+        size_t _k; //< Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound)
+        size_t _n; //< Row/column dimension of A.
 
         // @note r is a big matrix in ZZ holding all residues
         IMatrix _r;

From 5bb101fee136bd264fb75f4232bad3285aa0a327 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 22 May 2019 15:29:09 +0200
Subject: [PATCH 09/63] Initializing up to inverse of A mod pi

---
 .../algorithms/multi-mod-lifting-container.h  | 212 ++++++++++--------
 1 file changed, 121 insertions(+), 91 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 21ececa27..a0c984179 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -69,9 +69,10 @@ namespace LinBox {
         using Field = _Field;
         using PrimeGenerator = _PrimeGenerator;
 
-        using IElement = typename _Ring::Element;
+        using IElement = typename Ring::Element;
         using IMatrix = DenseMatrix<_Ring>;
         using IVector = DenseVector<_Ring>;
+        using FElement = typename Field::Element;
         using FMatrix = DenseMatrix<_Field>;
         using FVector = DenseVector<_Field>;
 
@@ -83,23 +84,70 @@ namespace LinBox {
         MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A,
                                  const IVector& b, const Method::DixonRNS& m)
             : _ring(ring)
-            , _r(_ring)
-            , _c(_field)
+            , _n(A.rowdim())
         {
-            // @fixme Compute hadamard and such
+            linbox_check(A.rowdim() == A.coldim());
 
-            // @note From baseClass, we have _length = log2(2 * N * D)
+            A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
+            std::cout << "b: " << b << std::endl;
 
             // @fixme Have l = log(||A||) + log(n) or so
+            _l = 2;
+            std::cout << "l: " << _l << std::endl;
+
+            // Generating primes
+            IElement iTmp;
+            _ring.assign(_p, _ring.one);
+            for (auto i = 0u; i < _l; ++i) {
+                // @fixme Ensure that all primes are different
+                // @fixme Take into account bestBitSize!
+                _primes.emplace_back(*primeGenerator);
+                _fields.emplace_back(_primes.back());
+                _ring.init(iTmp, _primes.back());
+                _ring.mulin(_p, iTmp);
+
+                std::cout << "primes[" << i << "]: " << Integer(_primes.back()) << std::endl;
+
+                ++primeGenerator;
+            }
 
-            // @fixme Initialize fields _F[i]
-
-            // Ap[0] = A mod p[0]
-            // Ap[1] = A mod p[1]
-
-            // B[0] = inv(Ap[0]) mod p[0]
-            // B[1] = inv(Ap[1]) mod p[1]
-            // @fixme How?
+            std::cout << "p: " << _p << std::endl;
+
+            // Compute how many iterations are needed
+            auto hb = RationalSolveHadamardBound(A, b);
+            double pLog = Givaro::logtwo(_p);
+            _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound)
+                           / pLog); // log2(2 * N * D) / log2(p)
+            std::cout << "k: " << _k << std::endl;
+
+            // @fixme Fact is RationalReconstruction which needs numbound and denbound
+            // expects them to be in non-log...
+            _ring.init(_numbound, Integer(1) << static_cast<uint64_t>(std::ceil(hb.numLogBound)));
+            _ring.init(_denbound, Integer(1) << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
+
+            // Initialize all inverses
+            // @fixme Somehow, the inverse mod p within DixonSolver<Dense> was already computed,
+            // and pass through to the lifting container. Here, we can't do that, because p is
+            // bigger than what DixonSolver<Dense> thought about it. So there might be a lot of
+            // computation done there that is completely useless when using this container. Meaning
+            // that we need a RNSDixonSolver<Dense>.
+            {
+                for (const auto& F : _fields) {
+                    BlasMatrixDomain<Field> bmd(F);
+                    auto Bpi = std::make_unique<FMatrix>(F, _n, _n);
+
+                    // @fixme Taken for rational-solver.inl. BETTER USE REBIND!!!
+                    for (size_t i = 0; i < _n; ++i) {
+                        for (size_t j = 0; j < _n; ++j) {
+                            F.init(Bpi->refEntry(i, j), A.getEntry(i, j));
+                        }
+                    }
+
+                    bmd.invin(*Bpi);
+                    Bpi->write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl;
+                    _B.emplace_back(std::move(Bpi));
+                }
+            }
 
             // @note As _r is row major, we store each ri on each row.
             // So that r[i] = current residue for p[i].
@@ -117,48 +165,6 @@ namespace LinBox {
             // @todo Set up an RNS system
         }
 
-        // @fixme USELESS?
-        IVector& nextdigit(IVector& digit, const IVector& residu) const
-        {
-            // @fixme The residu can't be r, here!
-            // So the overall does a lot more job than it needs.
-            // See below for the solution.
-
-            // @fixme With this design, are we forced to CRT_Reconstruct each ci?
-            // Is this bad?
-            // If we don't want that, we need to not extent LiftingContainerBase,
-            // and reimplement some of the behavior.
-            // Because the only thing needed to user API (rational reconstruction)
-            // is bool next (IVector& digit) from iterator.
-
-            /*  for i = 1 .. l:
-             *  |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
-             *  |   ci = Bi Ri mod pi                   < Matrix-vector in Z/pZ
-             *  V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
-             *  for i = 1 .. l:
-             *  |   ri = Qi + (Vi / pi)
-             */
-
-            // @fixme Could be parallel!
-            // for (auto i = 0u; i < l; ++i) {
-            //     Hom<Ring, Field> hom(_ring, _F[i]);
-
-            //     // @fixme How to do euclidian division?
-            //     // ri = pi Qi + Ri
-
-            //     // @todo If R might already be a field element
-            //     _B[i]->apply(_c[i], hom.convert(_R[i]));
-
-            //     // @todo Convert _c[i] to RNS
-            // }
-
-            // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields?
-
-            // @fixme Compute the next residue!
-
-            return digit;
-        }
-
         // --------------------------
         // ----- LiftingContainer API
 
@@ -180,15 +186,9 @@ namespace LinBox {
         // ----- NOT LiftingContainer API
         // ----- but still needed
 
-		const IElement numbound() const
-		{
-			return _numbound;
-		}
+        const IElement numbound() const { return _numbound; }
 
-		const IElement denbound() const
-		{
-			return _denbound;
-		}
+        const IElement denbound() const { return _denbound; }
 
         // --------------
         // ----- Iterator
@@ -199,36 +199,65 @@ namespace LinBox {
          */
         class const_iterator {
         private:
-            BlasVector<Ring> _res;
             const MultiModLiftingContainer& _lc;
             size_t _position;
 
         public:
-            const_iterator(const MultiModLiftingContainer& lc, size_t end = 0)
+            const_iterator(const MultiModLiftingContainer& lc, size_t position = 0)
                 : _lc(lc)
-                , _position(end)
+                , _position(position)
             {
-                // @fixme Initialize _residue
+                // @fixme Initialize reisdue _r
             }
 
             /**
              * Returns false if the next digit cannot be computed (bad modulus).
+             * ci is a vector of integers but all element are below p = p1 * ... * pl
              */
-            bool next(IVector& digit)
+            bool next(IVector& ci)
             {
-                // compute v2 = _matA * digit
-                IVector v2(_lc.ring(), _lc.size());
-                // @fixme _lc._MAD.applyV(v2, digit, _res);
-
-                // update _res -= v2
-                // @fixme _lc._VDR.subin(_res, v2);
-                typename BlasVector<Ring>::iterator p0;
-
-                // update _res = _res / p
-                int index = 0;
-                for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) {
-                    _lc.ring().divin(*p0, _lc._p);
-                }
+                /*  for i = 1 .. l:
+                 *  |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
+                 *  |   ci = Bi Ri mod pi                   < Matrix-vector in Z/pZ
+                 *  V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
+                 *  for i = 1 .. l:
+                 *  |   ri = Qi + (Vi / pi)
+                 */
+
+                std::cout << "ci: " << ci << std::endl;
+
+                // @fixme Could be parallel!
+                // for (auto i = 0u; i < l; ++i) {
+                //     Hom<Ring, Field> hom(_ring, _F[i]);
+
+                //     // @fixme How to do euclidian division?
+                //     // ri = pi Qi + Ri
+
+                //     // @todo If R might already be a field element
+                //     _B[i]->apply(_c[i], hom.convert(_R[i]));
+
+                //     // @todo Convert _c[i] to RNS
+                // }
+
+                // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields?
+
+                // @fixme Compute the next residue!
+
+                // @fixme @note For us, Aci is a matrix!
+
+                // // compute Aci = _matA * ci
+                // IVector Aci(_lc.ring(), _lc.size());
+                // // @fixme _lc._MAD.applyV(Aci, ci, _res);
+
+                // // update _res -= Aci
+                // // @fixme _lc._VDR.subin(_res, Aci);
+                // typename BlasVector<Ring>::iterator p0;
+
+                // // update _res = _res / p
+                // int index = 0;
+                // for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) {
+                //     _lc.ring().divin(*p0, _lc._p);
+                // }
 
                 // increase position of the iterator
                 ++_position;
@@ -251,21 +280,22 @@ namespace LinBox {
 
     private:
         const Ring& _ring;
-        Field _field;
 
         IElement _numbound;
         IElement _denbound;
 
-        IElement _p;
-        size_t _k; //< Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound)
-        size_t _n; //< Row/column dimension of A.
+        IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
+        std::vector<FElement> _primes; // @fixme We might want something else as a type!
+        size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
+        size_t _n; // Row/column dimension of A.
+        size_t _l; // How many primes. Equal to _primes.size().
 
         // @note r is a big matrix in ZZ holding all residues
-        IMatrix _r;
-        FMatrix _c;
-        std::vector<FMatrix> _B; // Inverses of A mod p[i]
-        std::vector<IVector> _Q;
-        std::vector<FVector> _R;
-        std::vector<Field> _F;
+        // IMatrix _r;
+        // FMatrix _c;
+        std::vector<std::unique_ptr<FMatrix>> _B; // Inverses of A mod p[i]
+        // std::vector<IVector> _Q;
+        // std::vector<FVector> _R;
+        std::vector<Field> _fields;
     };
 }

From 134c3cf5a3bf7a462b27c38fb7b8a6ab4f09628b Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 22 May 2019 16:30:29 +0200
Subject: [PATCH 10/63] More RNS dixon

---
 .../algorithms/multi-mod-lifting-container.h  | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index a0c984179..4e2cd37f1 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -98,7 +98,7 @@ namespace LinBox {
             // Generating primes
             IElement iTmp;
             _ring.assign(_p, _ring.one);
-            for (auto i = 0u; i < _l; ++i) {
+            for (auto j = 0u; j < _l; ++j) {
                 // @fixme Ensure that all primes are different
                 // @fixme Take into account bestBitSize!
                 _primes.emplace_back(*primeGenerator);
@@ -224,20 +224,24 @@ namespace LinBox {
                  *  |   ri = Qi + (Vi / pi)
                  */
 
-                std::cout << "ci: " << ci << std::endl;
-
                 // @fixme Could be parallel!
-                // for (auto i = 0u; i < l; ++i) {
-                //     Hom<Ring, Field> hom(_ring, _F[i]);
-
-                //     // @fixme How to do euclidian division?
-                //     // ri = pi Qi + Ri
+                for (auto j = 0u; j < _l; ++j) {
+                    // @fixme How to do euclidian division?
+                    // ri = pi Qi + Ri
+
+                    // @todo If R might already be a field element
+                    // @cpernet!!!
+                    // @fixme We will probably need a low-level API
+                    // so that we can say that the j-th row of _ci takes
+                    // the result of B * R mod pj
+                    // _B[j]->apply(*_ci[j], *_R[j]);
+
+                    // @todo Convert _c[i] to RNS
+                }
 
-                //     // @todo If R might already be a field element
-                //     _B[i]->apply(_c[i], hom.convert(_R[i]));
+                // @fixme CRT reconstruct ci from (cij)
 
-                //     // @todo Convert _c[i] to RNS
-                // }
+                std::cout << "ci: " << ci << std::endl;
 
                 // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields?
 
@@ -292,7 +296,7 @@ namespace LinBox {
 
         // @note r is a big matrix in ZZ holding all residues
         // IMatrix _r;
-        // FMatrix _c;
+        FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row.
         std::vector<std::unique_ptr<FMatrix>> _B; // Inverses of A mod p[i]
         // std::vector<IVector> _Q;
         // std::vector<FVector> _R;

From 7644d6d6073e7357d77f291d720693b85a9f882f Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 22 May 2019 18:00:43 +0200
Subject: [PATCH 11/63] Fixed compilation of MultiModLiftingContainer

---
 .../algorithms/multi-mod-lifting-container.h  | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 4e2cd37f1..8f52f6ed9 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -106,7 +106,7 @@ namespace LinBox {
                 _ring.init(iTmp, _primes.back());
                 _ring.mulin(_p, iTmp);
 
-                std::cout << "primes[" << i << "]: " << Integer(_primes.back()) << std::endl;
+                std::cout << "primes[" << j << "]: " << Integer(_primes.back()) << std::endl;
 
                 ++primeGenerator;
             }
@@ -116,8 +116,8 @@ namespace LinBox {
             // Compute how many iterations are needed
             auto hb = RationalSolveHadamardBound(A, b);
             double pLog = Givaro::logtwo(_p);
-            _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound)
-                           / pLog); // log2(2 * N * D) / log2(p)
+            // _k = log2(2 * N * D) / log2(p)
+            _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog);
             std::cout << "k: " << _k << std::endl;
 
             // @fixme Fact is RationalReconstruction which needs numbound and denbound
@@ -132,20 +132,22 @@ namespace LinBox {
             // computation done there that is completely useless when using this container. Meaning
             // that we need a RNSDixonSolver<Dense>.
             {
+                _B.reserve(_l);
+
                 for (const auto& F : _fields) {
                     BlasMatrixDomain<Field> bmd(F);
-                    auto Bpi = std::make_unique<FMatrix>(F, _n, _n);
+                    _B.emplace_back(F, _n, _n);
+                    auto& Bpi = _B.back();
 
                     // @fixme Taken for rational-solver.inl. BETTER USE REBIND!!!
                     for (size_t i = 0; i < _n; ++i) {
                         for (size_t j = 0; j < _n; ++j) {
-                            F.init(Bpi->refEntry(i, j), A.getEntry(i, j));
+                            F.init(Bpi.refEntry(i, j), A.getEntry(i, j));
                         }
                     }
 
-                    bmd.invin(*Bpi);
-                    Bpi->write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl;
-                    _B.emplace_back(std::move(Bpi));
+                    bmd.invin(Bpi); // @fixme Use FFLAS directly, so that we can have a REAL in place inv.
+                    Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl;
                 }
             }
 
@@ -225,7 +227,7 @@ namespace LinBox {
                  */
 
                 // @fixme Could be parallel!
-                for (auto j = 0u; j < _l; ++j) {
+                for (auto j = 0u; j < _lc._l; ++j) {
                     // @fixme How to do euclidian division?
                     // ri = pi Qi + Ri
 
@@ -296,8 +298,8 @@ namespace LinBox {
 
         // @note r is a big matrix in ZZ holding all residues
         // IMatrix _r;
-        FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row.
-        std::vector<std::unique_ptr<FMatrix>> _B; // Inverses of A mod p[i]
+        // FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row.
+        std::vector<FMatrix> _B; // Inverses of A mod p[i]
         // std::vector<IVector> _Q;
         // std::vector<FVector> _R;
         std::vector<Field> _fields;

From 1ad2b15782fdd626f74bae0db61d73be652499e7 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 23 May 2019 14:10:37 +0200
Subject: [PATCH 12/63] RNSDixon euclidian division and so

---
 .../algorithms/multi-mod-lifting-container.h  | 119 ++++++++++++------
 linbox/algorithms/rns.h                       |   3 +
 linbox/algorithms/rns.inl                     |  16 ++-
 linbox/solutions/solve/solve-dixon-rns.h      |   5 +-
 tests/test-solve-full.C                       |   8 +-
 5 files changed, 110 insertions(+), 41 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 8f52f6ed9..d225be731 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -22,6 +22,7 @@
 
 #pragma once
 
+#include <linbox/algorithms/rns.h>
 #include <linbox/solutions/methods.h>
 
 namespace LinBox {
@@ -84,6 +85,8 @@ namespace LinBox {
         MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A,
                                  const IVector& b, const Method::DixonRNS& m)
             : _ring(ring)
+            , _A(A)
+            , _b(b)
             , _n(A.rowdim())
         {
             linbox_check(A.rowdim() == A.coldim());
@@ -91,7 +94,7 @@ namespace LinBox {
             A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
             std::cout << "b: " << b << std::endl;
 
-            // @fixme Have l = log(||A||) + log(n) or so
+            // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?)
             _l = 2;
             std::cout << "l: " << _l << std::endl;
 
@@ -111,6 +114,7 @@ namespace LinBox {
                 ++primeGenerator;
             }
 
+            _pRns.init(_primes);
             std::cout << "p: " << _p << std::endl;
 
             // Compute how many iterations are needed
@@ -126,11 +130,10 @@ namespace LinBox {
             _ring.init(_denbound, Integer(1) << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
 
             // Initialize all inverses
-            // @fixme Somehow, the inverse mod p within DixonSolver<Dense> was already computed,
-            // and pass through to the lifting container. Here, we can't do that, because p is
-            // bigger than what DixonSolver<Dense> thought about it. So there might be a lot of
-            // computation done there that is completely useless when using this container. Meaning
-            // that we need a RNSDixonSolver<Dense>.
+            // @note An inverse mod some p within DixonSolver<Dense> was already computed,
+            // and pass through to the lifting container. Here, we could use that, but we have
+            // to keep control of generated primes, so that the RNS base has bigger primes
+            // than the .
             {
                 _B.reserve(_l);
 
@@ -146,25 +149,14 @@ namespace LinBox {
                         }
                     }
 
-                    bmd.invin(Bpi); // @fixme Use FFLAS directly, so that we can have a REAL in place inv.
-                    Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl;
+                    // @fixme @cpernet Use FFLAS directly, so that we can have a REAL in place inv.
+                    bmd.invin(Bpi);
+
+                    Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ",
+                              Tag::FileFormat::Maple)
+                        << std::endl;
                 }
             }
-
-            // @note As _r is row major, we store each ri on each row.
-            // So that r[i] = current residue for p[i].
-            // _r.init(_ring, l, b.size());
-            // for (auto i = 0u; i < l; ++i) {
-            //     // @fixme Is there a vector domain to copy to a matrix?
-            //     for (auto j = 0u; j < b.size(); ++j) {
-            //         _ring.assign(_r[i][j], b[j]);
-            //     }
-            // }
-
-            // @fixme Allocate Q and R
-            // @fixme Allocate c
-
-            // @todo Set up an RNS system
         }
 
         // --------------------------
@@ -180,7 +172,7 @@ namespace LinBox {
 
         /**
          * We are compliant to the interface even though
-         * p is multi-modular and thus not a prime.
+         * p is multi-modular and thus not a prime per se.
          */
         const IElement& prime() const final { return _p; }
 
@@ -202,6 +194,13 @@ namespace LinBox {
         class const_iterator {
         private:
             const MultiModLiftingContainer& _lc;
+            std::vector<IVector> _r; // @todo Could be a matrix? Might not be useful, as it is never
+                                     // used directly in computations.
+            std::vector<IVector> _Q;
+            std::vector<IVector> _R; // @fixme This one should be expressed in a RNS system q, and
+                                     // HAS TO BE A MATRIX for gemm.
+            std::vector<FVector>
+                _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later.
             size_t _position;
 
         public:
@@ -209,7 +208,27 @@ namespace LinBox {
                 : _lc(lc)
                 , _position(position)
             {
-                // @fixme Initialize reisdue _r
+                VectorDomain<Ring> VD(_lc._ring);
+
+                _r.reserve(_lc._l);
+                _Q.reserve(_lc._l);
+                _R.reserve(_lc._l);
+                _Fc.reserve(_lc._l);
+                for (auto j = 0u; j < _lc._l; ++j) {
+                    auto& F = _lc._fields[j];
+
+                    _r.emplace_back(_lc._ring, _lc._n);
+                    _Q.emplace_back(_lc._ring, _lc._n);
+                    _R.emplace_back(_lc._ring, _lc._n);
+                    _Fc.emplace_back(F, _lc._n);
+
+                    // Initialize all residues to b
+                    _r.back() = _lc._b; // Copying data
+                }
+
+                // @fixme Allocate c
+
+                // @todo Set up an RNS system
             }
 
             /**
@@ -218,6 +237,8 @@ namespace LinBox {
              */
             bool next(IVector& ci)
             {
+                std::cout << "----- NEXT" << std::endl;
+
                 /*  for i = 1 .. l:
                  *  |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
                  *  |   ci = Bi Ri mod pi                   < Matrix-vector in Z/pZ
@@ -226,22 +247,45 @@ namespace LinBox {
                  *  |   ri = Qi + (Vi / pi)
                  */
 
-                // @fixme Could be parallel!
+                // @fixme Should be done in parallel!
                 for (auto j = 0u; j < _lc._l; ++j) {
-                    // @fixme How to do euclidian division?
-                    // ri = pi Qi + Ri
+                    auto pj = _lc._primes[j];
+                    auto& r = _r[j];
+                    auto& Q = _Q[j];
+                    auto& R = _R[j];
+
+                    // @todo @cpernet Is there a VectorDomain::divmod somewhere?
+                    // Euclidian division so that rj = pj Qj + Rj
+                    for (auto i = 0u; i < _lc._n; ++i) {
+                        // @fixme @cpernet Is this OK for any Ring or should we be sure we are using
+                        // Integers?
+                        _lc._ring.quoRem(Q[i], R[i], r[i], pj);
+                    }
 
-                    // @todo If R might already be a field element
-                    // @cpernet!!!
-                    // @fixme We will probably need a low-level API
-                    // so that we can say that the j-th row of _ci takes
-                    // the result of B * R mod pj
-                    // _B[j]->apply(*_ci[j], *_R[j]);
+                    std::cout << "--- FOR " << Integer(pj) << std::endl;
+                    std::cout << "r: " << r << std::endl;
+                    std::cout << "Q: " << Q << std::endl;
+                    std::cout << "R: " << R << std::endl;
+
+                    // Convert R to the field
+                    // @fixme @cpernet Could this step be ignored?
+                    // If not, put that in already allocated memory, and not use a temporary here.
+                    auto& F = _lc._fields[j];
+                    FVector FR(F, R); // rebind
+
+                    auto& B = _lc._B[j];
+                    auto& Fc = _Fc[j];
+                    B.apply(Fc, FR);
+
+                    std::cout << "Fc: " << Fc << std::endl;
 
                     // @todo Convert _c[i] to RNS
                 }
 
                 // @fixme CRT reconstruct ci from (cij)
+                // @cpernet Is that what I should use? I tweaked it so that I can use it.
+                // _lc._pRns.cra(ci, _Fc); // @fixme This cra function should be called reconstruct or such.
+                // @fixme Better use Givaro::RNSSystem?
 
                 std::cout << "ci: " << ci << std::endl;
 
@@ -287,11 +331,16 @@ namespace LinBox {
     private:
         const Ring& _ring;
 
+        // The problem: A^{-1} * b
+        const IMatrix& _A;
+        const IVector& _b;
+
         IElement _numbound;
         IElement _denbound;
 
         IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
+        RNS<false> _pRns;              // RNS system for primes
         size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
         size_t _n; // Row/column dimension of A.
         size_t _l; // How many primes. Equal to _primes.size().
@@ -302,6 +351,6 @@ namespace LinBox {
         std::vector<FMatrix> _B; // Inverses of A mod p[i]
         // std::vector<IVector> _Q;
         // std::vector<FVector> _R;
-        std::vector<Field> _fields;
+        std::vector<Field> _fields; // All fields Modular<p[i]>
     };
 }
diff --git a/linbox/algorithms/rns.h b/linbox/algorithms/rns.h
index 2a7fffa16..c2214abc4 100644
--- a/linbox/algorithms/rns.h
+++ b/linbox/algorithms/rns.h
@@ -78,6 +78,7 @@ namespace LinBox
 		 * @param l  max recoverable bits
 		 * @param ps bitsize of the primes (defaulting to 21 because...)
 		 */
+		RNS() {}
 		RNS(size_t l, size_t ps=21) ;
 		/*x Create a RNS with given primes.
 		 * @param primes given basis of primes
@@ -97,6 +98,8 @@ namespace LinBox
 		/*! Inits cra.
 		 */
 		void initCRA() ;
+		template <class T>
+		void init(const std::vector<T>& primes);
 		/*! Computes \c result corresponding to the \c residues.
 		 *
 		 */
diff --git a/linbox/algorithms/rns.inl b/linbox/algorithms/rns.inl
index b38a227eb..5a8ce69af 100644
--- a/linbox/algorithms/rns.inl
+++ b/linbox/algorithms/rns.inl
@@ -56,7 +56,7 @@ namespace LinBox
 				if (curint>maxint)
 					break;
 				PrimeIterator<IteratorCategories::HeuristicTag> genprimes( (unsigned int) (_ps_+penalty) );
-				size_t p = genprimes.randomPrime() ;
+				size_t p = *genprimes ;
 				++genprimes;
 				primeset.insert(p);
 				if (lg < primeset.size()) {
@@ -104,6 +104,18 @@ namespace LinBox
 		return ;
 	}
 
+	template <bool Unsigned>
+	template <class T>
+	void RNS<Unsigned>::init(const std::vector<T>& primes)
+	{
+		_primes_.resize(primes.size());
+		_PrimeDoms_.resize(primes.size());
+		for (auto i = 0u; i < primes.size(); ++i) {
+			_primes_[i] = size_t(primes[i]);
+			_PrimeDoms_[i] = Field(primes[i]);
+		}
+	}
+
 	template<bool Unsigned>
 	void
 	RNS<Unsigned>::cra(integer & result, const std::vector<double> & residues)
@@ -183,7 +195,7 @@ namespace LinBox
 				if (curint>maxint)
 					break;
 				PrimeIterator<IteratorCategories::HeuristicTag> genprimes((unsigned int) (_ps_+penalty) );
-				size_t p = genprimes.randomPrime() ;
+				size_t p = *genprimes ;
 				++genprimes;
 				primeset.insert(p);
 				if (lg < primeset.size()) {
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index c62e1252c..1c8c6c306 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -70,7 +70,10 @@ namespace LinBox {
     {
         commentator().start("solve.dixon.integer.dense");
 
-        using Field = Givaro::ModularBalanced<double>;
+        // @fixme We don't know if we can use ModularBalanced<double>,
+        // because of the rational reconstruction which might be
+        // implicitly requiring 0-{p-1} representation of the p-adic sequence elements.
+        using Field = Givaro::Modular<double>;
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
         PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
 
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 4caf811bf..e9ad202ca 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -139,10 +139,12 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R
     bool ok = true;
     try {
         solve(x, A, b, method);
-        ok = ok && check_result<SolveMethod>(x, A, b, RA, Rb);
+        ok = check_result<SolveMethod>(x, A, b, RA, Rb);
 
-        solveInPlace(x, A, b, method);
-        ok = ok && check_result<SolveMethod>(x, A, b, RA, Rb);
+        if (ok) {
+            solveInPlace(x, A, b, method);
+            ok = check_result<SolveMethod>(x, A, b, RA, Rb);
+        }
     } catch (...) {
         print_error<SolveMethod>(x, A, b, "throws error");
         return false;

From a081d4cee095659fc9443fdc51e3cb45392c3227 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 23 May 2019 14:59:14 +0200
Subject: [PATCH 13/63] Working RNS Dixon on very small bitsize

---
 linbox/algorithms/lifting-container.h         |  2 +-
 .../algorithms/multi-mod-lifting-container.h  | 90 +++++++++++--------
 linbox/algorithms/rns.h                       | 11 +--
 linbox/algorithms/rns.inl                     |  9 +-
 linbox/solutions/solve/solve-dixon-rns.h      |  3 +
 5 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h
index 7735c9f89..81c992bc5 100644
--- a/linbox/algorithms/lifting-container.h
+++ b/linbox/algorithms/lifting-container.h
@@ -224,7 +224,7 @@ namespace LinBox
 
 				// compute v2 = _matA * digit
 				IVector v2 (_lc.ring(),_lc._matA.rowdim());
-				_lc._MAD.applyV(v2,digit, _res);
+				_lc._MAD.applyV(v2,digit, _res); // @fixme This third parameter makes no sense!
 
 #ifdef DEBUG_LC
 
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index d225be731..c67a290f8 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -114,7 +114,6 @@ namespace LinBox {
                 ++primeGenerator;
             }
 
-            _pRns.init(_primes);
             std::cout << "p: " << _p << std::endl;
 
             // Compute how many iterations are needed
@@ -203,12 +202,17 @@ namespace LinBox {
                 _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later.
             size_t _position;
 
+            // @fixme Better use Givaro::RNSSystem?
+            RNS<true> _pRns; // RNS system for primes
+
         public:
             const_iterator(const MultiModLiftingContainer& lc, size_t position = 0)
                 : _lc(lc)
                 , _position(position)
             {
-                VectorDomain<Ring> VD(_lc._ring);
+                VectorDomain<Ring> IVD(_lc._ring);
+
+                _pRns.init(_lc._primes);
 
                 _r.reserve(_lc._l);
                 _Q.reserve(_lc._l);
@@ -233,19 +237,13 @@ namespace LinBox {
 
             /**
              * Returns false if the next digit cannot be computed (bad modulus).
-             * ci is a vector of integers but all element are below p = p1 * ... * pl
+             * c is a vector of integers but all element are below p = p1 * ... * pl
              */
-            bool next(IVector& ci)
+            bool next(IVector& c)
             {
                 std::cout << "----- NEXT" << std::endl;
 
-                /*  for i = 1 .. l:
-                 *  |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
-                 *  |   ci = Bi Ri mod pi                   < Matrix-vector in Z/pZ
-                 *  V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
-                 *  for i = 1 .. l:
-                 *  |   ri = Qi + (Vi / pi)
-                 */
+                VectorDomain<Ring> IVD(_lc._ring);
 
                 // @fixme Should be done in parallel!
                 for (auto j = 0u; j < _lc._l; ++j) {
@@ -282,34 +280,56 @@ namespace LinBox {
                     // @todo Convert _c[i] to RNS
                 }
 
-                // @fixme CRT reconstruct ci from (cij)
-                // @cpernet Is that what I should use? I tweaked it so that I can use it.
-                // _lc._pRns.cra(ci, _Fc); // @fixme This cra function should be called reconstruct or such.
-                // @fixme Better use Givaro::RNSSystem?
+                // ----- CRT reconstruct c from (cj)
 
-                std::cout << "ci: " << ci << std::endl;
+                std::cout << "--- CRT reconstruction" << std::endl;
 
-                // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields?
+                // @cpernet Is that RNS system what I should use? I tweaked it so that I can use it.
+                std::vector<FElement> fElements(_lc._l);
+                for (auto i = 0u; i < _lc._n; ++i) {
+                    for (auto j = 0u; j < _lc._l; ++j) {
+                        fElements[j] = _Fc[j][i];
+                    }
+                    // @fixme This cra function should be called reconstruct or such.
+                    _pRns.cra(c[i], fElements);
+                }
 
-                // @fixme Compute the next residue!
+                std::cout << "c: " << c << std::endl;
 
-                // @fixme @note For us, Aci is a matrix!
+                // ----- Compute the next residue!
 
-                // // compute Aci = _matA * ci
-                // IVector Aci(_lc.ring(), _lc.size());
-                // // @fixme _lc._MAD.applyV(Aci, ci, _res);
+                std::cout << "--- Residue update" << std::endl;
 
-                // // update _res -= Aci
-                // // @fixme _lc._VDR.subin(_res, Aci);
-                // typename BlasVector<Ring>::iterator p0;
+                // @note This is a dummy implementation, for now.
 
-                // // update _res = _res / p
-                // int index = 0;
-                // for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) {
-                //     _lc.ring().divin(*p0, _lc._p);
-                // }
+                // r <= (rj - A c) / pj
+                for (auto j = 0u; j < _lc._l; ++j) {
+                    auto pj = _lc._primes[j];
+                    auto& r = _r[j];
+                    auto& Q = _Q[j];
+                    auto& R = _R[j];
+
+                    auto& Fc = _Fc[j];
+                    // @fixme For now, we convert cj to integer,
+                    // but it should be converted into a RNS system, on pre-allocated memory.
+                    IVector Ic(_lc._ring, Fc);
+
+                    // @fixme Should become a matrix-matrix multiplication!
+                    // @fixme Should be able to do a gemv
+                    _lc._A.apply(r, Ic); // r = A c
+                    IVD.negin(r);        // r = - A c
+                    IVD.addin(r, R);     // r = R - A c
+
+                    // r = (R - A c) / pj
+                    IElement Ipj;
+                    _lc._ring.init(Ipj, pj);
+                    for (auto i = 0u; i < _lc._n; ++i) {
+                        _lc._ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain?
+                    }
+
+                    IVD.addin(r, Q); // r = Q + (R - A c) / pj
+                }
 
-                // increase position of the iterator
                 ++_position;
                 return true;
             }
@@ -340,17 +360,11 @@ namespace LinBox {
 
         IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
-        RNS<false> _pRns;              // RNS system for primes
         size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
         size_t _n; // Row/column dimension of A.
         size_t _l; // How many primes. Equal to _primes.size().
 
-        // @note r is a big matrix in ZZ holding all residues
-        // IMatrix _r;
-        // FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row.
-        std::vector<FMatrix> _B; // Inverses of A mod p[i]
-        // std::vector<IVector> _Q;
-        // std::vector<FVector> _R;
+        std::vector<FMatrix> _B;    // Inverses of A mod p[i]
         std::vector<Field> _fields; // All fields Modular<p[i]>
     };
 }
diff --git a/linbox/algorithms/rns.h b/linbox/algorithms/rns.h
index c2214abc4..0a966acf1 100644
--- a/linbox/algorithms/rns.h
+++ b/linbox/algorithms/rns.h
@@ -100,14 +100,11 @@ namespace LinBox
 		void initCRA() ;
 		template <class T>
 		void init(const std::vector<T>& primes);
+
 		/*! Computes \c result corresponding to the \c residues.
 		 *
 		 */
 		void cra(integer & result, const std::vector<double> & residues);
-		/*! Computes \c result corresponding to the \c residues.
-		 *
-		 */
-		void cra(std::vector<integer> & result, const std::vector<std::vector<double> > & residues);
 
 		/*! Computes \c result corresponding to the iteration.
 		 *
@@ -115,12 +112,6 @@ namespace LinBox
 		template<typename Iteration>
 		void cra(Ivect & result, Iteration & iter) ;
 
-		template<class Tinteger, class Tresidue>
-		void cra(Tinteger & result, Tresidue & residues);
-
-		template<class Tinteger, class Tresidue>
-		void convert(Tinteger & result, Tresidue & residues) ;
-
 		// mixed radix
 	};
 
diff --git a/linbox/algorithms/rns.inl b/linbox/algorithms/rns.inl
index 5a8ce69af..1723f94c9 100644
--- a/linbox/algorithms/rns.inl
+++ b/linbox/algorithms/rns.inl
@@ -108,12 +108,15 @@ namespace LinBox
 	template <class T>
 	void RNS<Unsigned>::init(const std::vector<T>& primes)
 	{
-		_primes_.resize(primes.size());
-		_PrimeDoms_.resize(primes.size());
-		for (auto i = 0u; i < primes.size(); ++i) {
+		_size_ = primes.size();
+		_primes_.resize(_size_);
+		_PrimeDoms_.resize(_size_);
+		for (auto i = 0u; i < _size_; ++i) {
 			_primes_[i] = size_t(primes[i]);
 			_PrimeDoms_[i] = Field(primes[i]);
 		}
+
+		_CRT_ = CRTSystem(_PrimeDoms_);
 	}
 
 	template<bool Unsigned>
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 1c8c6c306..e056ffbbd 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -80,6 +80,9 @@ namespace LinBox {
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);
 
+        std::cout << "FOUND xNum: " << xNum << std::endl;
+        std::cout << "FOUND xDen: " << xDen << std::endl;
+
         commentator().stop("solve.dixon.integer.dense");
 
         // @fixme Implement something like that

From 3b200440eb23a1b201b41b6e8ff0f7cdb44bb341 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 28 May 2019 10:15:17 +0200
Subject: [PATCH 14/63] Quality of life for debugging

---
 .../algorithms/multi-mod-lifting-container.h  | 27 ++++++++++---------
 linbox/solutions/solve/solve-dixon-rns.h      |  4 ++-
 tests/test-solve-full.C                       |  2 +-
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index c67a290f8..b70ff7fdb 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -102,16 +102,22 @@ namespace LinBox {
             IElement iTmp;
             _ring.assign(_p, _ring.one);
             for (auto j = 0u; j < _l; ++j) {
-                // @fixme Ensure that all primes are different
-                // @fixme Take into account bestBitSize!
-                _primes.emplace_back(*primeGenerator);
-                _fields.emplace_back(_primes.back());
-                _ring.init(iTmp, _primes.back());
+                auto pj = *primeGenerator;
+                ++primeGenerator;
+
+                // Ensure that all primes are different
+                if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) {
+                    j -= 1;
+                    continue;
+                }
+
+                _primes.emplace_back(pj);
+                _fields.emplace_back(pj);
+                _ring.init(iTmp, pj);
                 _ring.mulin(_p, iTmp);
 
-                std::cout << "primes[" << j << "]: " << Integer(_primes.back()) << std::endl;
+                std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl;
 
-                ++primeGenerator;
             }
 
             std::cout << "p: " << _p << std::endl;
@@ -150,10 +156,6 @@ namespace LinBox {
 
                     // @fixme @cpernet Use FFLAS directly, so that we can have a REAL in place inv.
                     bmd.invin(Bpi);
-
-                    Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ",
-                              Tag::FileFormat::Maple)
-                        << std::endl;
                 }
             }
         }
@@ -252,6 +254,8 @@ namespace LinBox {
                     auto& Q = _Q[j];
                     auto& R = _R[j];
 
+                    std::cout << "--- FOR " << Integer(pj) << std::endl;
+
                     // @todo @cpernet Is there a VectorDomain::divmod somewhere?
                     // Euclidian division so that rj = pj Qj + Rj
                     for (auto i = 0u; i < _lc._n; ++i) {
@@ -260,7 +264,6 @@ namespace LinBox {
                         _lc._ring.quoRem(Q[i], R[i], r[i], pj);
                     }
 
-                    std::cout << "--- FOR " << Integer(pj) << std::endl;
                     std::cout << "r: " << r << std::endl;
                     std::cout << "Q: " << Q << std::endl;
                     std::cout << "R: " << R << std::endl;
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index e056ffbbd..0c998eb8b 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -75,7 +75,9 @@ namespace LinBox {
         // implicitly requiring 0-{p-1} representation of the p-adic sequence elements.
         using Field = Givaro::Modular<double>;
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
-        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
+        // PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
+        // @fixme This is for debug!
+        PrimeGenerator primeGenerator(3);
 
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index e9ad202ca..54ea8c083 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -263,7 +263,7 @@ int main(int argc, char** argv)
     bool ok = true;
     do {
         // // ----- Rational Auto
-        // ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // // @fixme Dixon<Wiedemann> does not compile
         // // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);

From 505f896ae132cd94b39deb340fdfb7398a08f6a3 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 28 May 2019 16:17:52 +0200
Subject: [PATCH 15/63] Started MultiModRationalReconstruction

---
 .../algorithms/multi-mod-lifting-container.h  | 256 +++++++-----------
 linbox/solutions/solve/solve-dixon-rns.h      |  61 ++++-
 2 files changed, 164 insertions(+), 153 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index b70ff7fdb..4b348cf1b 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -95,13 +95,13 @@ namespace LinBox {
             std::cout << "b: " << b << std::endl;
 
             // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?)
-            _l = 2;
-            std::cout << "l: " << _l << std::endl;
+            _primesCount = 2;
+            std::cout << "l: " << _primesCount << std::endl;
 
             // Generating primes
             IElement iTmp;
             _ring.assign(_p, _ring.one);
-            for (auto j = 0u; j < _l; ++j) {
+            for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = *primeGenerator;
                 ++primeGenerator;
 
@@ -117,7 +117,6 @@ namespace LinBox {
                 _ring.mulin(_p, iTmp);
 
                 std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl;
-
             }
 
             std::cout << "p: " << _p << std::endl;
@@ -125,9 +124,9 @@ namespace LinBox {
             // Compute how many iterations are needed
             auto hb = RationalSolveHadamardBound(A, b);
             double pLog = Givaro::logtwo(_p);
-            // _k = log2(2 * N * D) / log2(p)
-            _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog);
-            std::cout << "k: " << _k << std::endl;
+            // _iterationsCount = log2(2 * N * D) / log2(p)
+            _iterationsCount = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog);
+            std::cout << "k: " << _iterationsCount << std::endl;
 
             // @fixme Fact is RationalReconstruction which needs numbound and denbound
             // expects them to be in non-log...
@@ -140,7 +139,7 @@ namespace LinBox {
             // to keep control of generated primes, so that the RNS base has bigger primes
             // than the .
             {
-                _B.reserve(_l);
+                _B.reserve(_primesCount);
 
                 for (const auto& F : _fields) {
                     BlasMatrixDomain<Field> bmd(F);
@@ -158,6 +157,24 @@ namespace LinBox {
                     bmd.invin(Bpi);
                 }
             }
+
+            //----- Iteration
+
+            _r.reserve(_primesCount);
+            _Q.reserve(_primesCount);
+            _R.reserve(_primesCount);
+            _Fc.reserve(_primesCount);
+            for (auto j = 0u; j < _primesCount; ++j) {
+                auto& F = _fields[j];
+
+                _r.emplace_back(_ring, _n);
+                _Q.emplace_back(_ring, _n);
+                _R.emplace_back(_ring, _n);
+                _Fc.emplace_back(F, _n);
+
+                // Initialize all residues to b
+                _r.back() = _b; // Copying data
+            }
         }
 
         // --------------------------
@@ -166,7 +183,7 @@ namespace LinBox {
         const Ring& ring() const final { return _ring; }
 
         /// The length of the container.
-        size_t length() const final { return _k; }
+        size_t length() const final { return _iterationsCount; }
 
         /// The dimension of the problem/solution.
         size_t size() const final { return _n; }
@@ -185,171 +202,97 @@ namespace LinBox {
 
         const IElement denbound() const { return _denbound; }
 
+        uint32_t primesCount() const { return _primesCount; }
+
+        const FElement& prime(uint32_t index) const { return _primes.at(index); }
+
         // --------------
         // ----- Iterator
 
         /**
-         * Needed API for rational reconstruction.
-         * Each call to next() will update
+         * Returns false if the next digit cannot be computed (bad modulus).
+         * c is a vector of integers but all element are below p = p1 * ... * pl
          */
-        class const_iterator {
-        private:
-            const MultiModLiftingContainer& _lc;
-            std::vector<IVector> _r; // @todo Could be a matrix? Might not be useful, as it is never
-                                     // used directly in computations.
-            std::vector<IVector> _Q;
-            std::vector<IVector> _R; // @fixme This one should be expressed in a RNS system q, and
-                                     // HAS TO BE A MATRIX for gemm.
-            std::vector<FVector>
-                _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later.
-            size_t _position;
-
-            // @fixme Better use Givaro::RNSSystem?
-            RNS<true> _pRns; // RNS system for primes
-
-        public:
-            const_iterator(const MultiModLiftingContainer& lc, size_t position = 0)
-                : _lc(lc)
-                , _position(position)
-            {
-                VectorDomain<Ring> IVD(_lc._ring);
-
-                _pRns.init(_lc._primes);
-
-                _r.reserve(_lc._l);
-                _Q.reserve(_lc._l);
-                _R.reserve(_lc._l);
-                _Fc.reserve(_lc._l);
-                for (auto j = 0u; j < _lc._l; ++j) {
-                    auto& F = _lc._fields[j];
-
-                    _r.emplace_back(_lc._ring, _lc._n);
-                    _Q.emplace_back(_lc._ring, _lc._n);
-                    _R.emplace_back(_lc._ring, _lc._n);
-                    _Fc.emplace_back(F, _lc._n);
-
-                    // Initialize all residues to b
-                    _r.back() = _lc._b; // Copying data
-                }
-
-                // @fixme Allocate c
-
-                // @todo Set up an RNS system
-            }
-
-            /**
-             * Returns false if the next digit cannot be computed (bad modulus).
-             * c is a vector of integers but all element are below p = p1 * ... * pl
-             */
-            bool next(IVector& c)
-            {
-                std::cout << "----- NEXT" << std::endl;
-
-                VectorDomain<Ring> IVD(_lc._ring);
-
-                // @fixme Should be done in parallel!
-                for (auto j = 0u; j < _lc._l; ++j) {
-                    auto pj = _lc._primes[j];
-                    auto& r = _r[j];
-                    auto& Q = _Q[j];
-                    auto& R = _R[j];
-
-                    std::cout << "--- FOR " << Integer(pj) << std::endl;
-
-                    // @todo @cpernet Is there a VectorDomain::divmod somewhere?
-                    // Euclidian division so that rj = pj Qj + Rj
-                    for (auto i = 0u; i < _lc._n; ++i) {
-                        // @fixme @cpernet Is this OK for any Ring or should we be sure we are using
-                        // Integers?
-                        _lc._ring.quoRem(Q[i], R[i], r[i], pj);
-                    }
-
-                    std::cout << "r: " << r << std::endl;
-                    std::cout << "Q: " << Q << std::endl;
-                    std::cout << "R: " << R << std::endl;
+        bool next(std::vector<IVector>& digits)
+        {
+            std::cout << "----- NEXT" << std::endl;
 
-                    // Convert R to the field
-                    // @fixme @cpernet Could this step be ignored?
-                    // If not, put that in already allocated memory, and not use a temporary here.
-                    auto& F = _lc._fields[j];
-                    FVector FR(F, R); // rebind
+            VectorDomain<Ring> IVD(_ring);
 
-                    auto& B = _lc._B[j];
-                    auto& Fc = _Fc[j];
-                    B.apply(Fc, FR);
+            // @fixme Should be done in parallel!
+            for (auto j = 0u; j < _primesCount; ++j) {
+                auto pj = _primes[j];
+                auto& r = _r[j];
+                auto& Q = _Q[j];
+                auto& R = _R[j];
 
-                    std::cout << "Fc: " << Fc << std::endl;
+                std::cout << "--- FOR " << Integer(pj) << std::endl;
 
-                    // @todo Convert _c[i] to RNS
+                // @todo @cpernet Is there a VectorDomain::divmod somewhere?
+                // Euclidian division so that rj = pj Qj + Rj
+                for (auto i = 0u; i < _n; ++i) {
+                    // @fixme @cpernet Is this OK for any Ring or should we be sure we are using
+                    // Integers?
+                    _ring.quoRem(Q[i], R[i], r[i], pj);
                 }
 
-                // ----- CRT reconstruct c from (cj)
+                std::cout << "r: " << r << std::endl;
+                std::cout << "Q: " << Q << std::endl;
+                std::cout << "R: " << R << std::endl;
 
-                std::cout << "--- CRT reconstruction" << std::endl;
+                // Convert R to the field
+                // @fixme @cpernet Could this step be ignored?
+                // If not, put that in already allocated memory, and not use a temporary here.
+                auto& F = _fields[j];
+                FVector FR(F, R); // rebind
 
-                // @cpernet Is that RNS system what I should use? I tweaked it so that I can use it.
-                std::vector<FElement> fElements(_lc._l);
-                for (auto i = 0u; i < _lc._n; ++i) {
-                    for (auto j = 0u; j < _lc._l; ++j) {
-                        fElements[j] = _Fc[j][i];
-                    }
-                    // @fixme This cra function should be called reconstruct or such.
-                    _pRns.cra(c[i], fElements);
-                }
+                auto& B = _B[j];
+                auto& Fc = _Fc[j];
+                B.apply(Fc, FR);
 
-                std::cout << "c: " << c << std::endl;
+                std::cout << "Fc: " << Fc << std::endl;
 
-                // ----- Compute the next residue!
+                // @todo Convert _c[i] to RNS
+                digits[j] = IVector(_ring, Fc);
+            }
 
-                std::cout << "--- Residue update" << std::endl;
+            // ----- Compute the next residue!
 
-                // @note This is a dummy implementation, for now.
+            std::cout << "--- Residue update" << std::endl;
 
-                // r <= (rj - A c) / pj
-                for (auto j = 0u; j < _lc._l; ++j) {
-                    auto pj = _lc._primes[j];
-                    auto& r = _r[j];
-                    auto& Q = _Q[j];
-                    auto& R = _R[j];
+            // @note This is a dummy implementation, for now.
 
-                    auto& Fc = _Fc[j];
-                    // @fixme For now, we convert cj to integer,
-                    // but it should be converted into a RNS system, on pre-allocated memory.
-                    IVector Ic(_lc._ring, Fc);
+            // r <= (r - A c) / p
+            for (auto j = 0u; j < _primesCount; ++j) {
+                auto pj = _primes[j];
+                auto& r = _r[j]; // @fixme THEY HOLD ALL THE VERY SAME VALUE!
+                auto& Q = _Q[j];
+                auto& R = _R[j];
 
-                    // @fixme Should become a matrix-matrix multiplication!
-                    // @fixme Should be able to do a gemv
-                    _lc._A.apply(r, Ic); // r = A c
-                    IVD.negin(r);        // r = - A c
-                    IVD.addin(r, R);     // r = R - A c
+                auto& Fc = _Fc[j];
+                // @fixme For now, we convert cj to integer,
+                // but it should be converted into a RNS system, on pre-allocated memory.
+                IVector Ic(_ring, Fc);
 
-                    // r = (R - A c) / pj
-                    IElement Ipj;
-                    _lc._ring.init(Ipj, pj);
-                    for (auto i = 0u; i < _lc._n; ++i) {
-                        _lc._ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain?
-                    }
+                // @fixme Should become a matrix-matrix multiplication!
+                // @fixme Should be able to do a gemv
+                _A.apply(r, Ic); // r = A c
+                IVD.negin(r);    // r = - A c
+                IVD.addin(r, R); // r = R - A c
 
-                    IVD.addin(r, Q); // r = Q + (R - A c) / pj
+                // r = (R - A c) / p
+                IElement Ipj;
+                _ring.init(Ipj, pj);
+                for (auto i = 0u; i < _n; ++i) {
+                    _ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain?
                 }
 
-                ++_position;
-                return true;
+                IVD.addin(r, Q); // r = Q + (R - A c) / p
             }
 
-            bool operator!=(const const_iterator& iterator) const
-            {
-                return _position != iterator._position;
-            }
-
-            bool operator==(const const_iterator& iterator) const
-            {
-                return _position == iterator._position;
-            }
-        };
-
-        const_iterator begin() const { return const_iterator(*this); }
-        const_iterator end() const { return const_iterator(*this, _k); }
+            ++_position;
+            return true;
+        }
 
     private:
         const Ring& _ring;
@@ -363,11 +306,22 @@ namespace LinBox {
 
         IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
-        size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
-        size_t _n; // Row/column dimension of A.
-        size_t _l; // How many primes. Equal to _primes.size().
+        size_t
+            _iterationsCount; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
+        size_t _n;            // Row/column dimension of A.
+        size_t _primesCount;  // How many primes. Equal to _primes.size().
 
         std::vector<FMatrix> _B;    // Inverses of A mod p[i]
         std::vector<Field> _fields; // All fields Modular<p[i]>
+
+        //----- Iteration
+        std::vector<IVector> _r; // @todo Could be a matrix? Might not be useful, as it is never
+                                 // used directly in computations.
+        std::vector<IVector> _Q;
+        std::vector<IVector> _R; // @fixme This one should be expressed in a RNS system q, and
+                                 // HAS TO BE A MATRIX for gemm.
+        std::vector<FVector>
+            _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later.
+        size_t _position;
     };
 }
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 0c998eb8b..85140dc44 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -25,6 +25,63 @@
 #include <linbox/algorithms/multi-mod-lifting-container.h>
 
 namespace LinBox {
+    /**
+     * From a MultiModLiftingContainer, will build
+     * the solution on each prime, then will do a CRT reconstruction,
+     * before reconstructing the rational.
+     *
+     * This does not do early termination.
+     */
+    template <class LiftingContainer>
+    class MultiModRationalReconstruction {
+        using Ring = typename LiftingContainer::Ring;
+        using IElement = typename LiftingContainer::IElement;
+        using IVector = typename LiftingContainer::IVector;
+
+    public:
+        MultiModRationalReconstruction(LiftingContainer& lc)
+            : _lc(lc)
+        {
+        }
+
+        bool getRational(IVector& xNum, IElement& xDen) {
+            VectorDomain<Ring> IVD(_lc.ring());
+
+            // Stores each c0 + c1 pj + ... + ck pj^k for each pj
+            std::vector<IVector> padicAccumulations(_lc.primesCount(), _lc.ring());
+            // Temporary structure to store a ci for each pj
+            std::vector<IVector> digits(_lc.primesCount(), _lc.ring()); // @fixme Could be a Field Element?
+            // The pj^i for each pj
+            std::vector<IElement> radices(_lc.primesCount(), 1);
+
+            for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                padicAccumulations[j].resize(_lc.size());
+                digits[j].resize(_lc.size());
+            }
+
+            // @fixme IMPLEMENT Rat Recon
+            for (auto i = 0u; i < _lc.length(); ++i) {
+                _lc.next(digits);
+
+                // @fixme Better use PolEval (except memory explosion?)
+                for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                    std::cout << "STEP " << i << " DIGITS " << digits[j] << std::endl;
+                    IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
+                    _lc.ring().mulin(radices[j], _lc.prime(j));
+                    std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j] << std::endl;
+                }
+            }
+
+            // @fixme From here padicAccumulations are all right, we should CRT reconstruct that
+
+
+            return true;
+        }
+
+    private:
+        LiftingContainer& _lc;
+    };
+
     // @fixme Move that to a file - and make it be a RationalSolver<Method::DixonRNS>
     template <class Field, class Ring, class PrimeGenerator>
     class DixonRNSSolver {
@@ -49,9 +106,9 @@ namespace LinBox {
 
             using LiftingContainer = MultiModLiftingContainer<Field, Ring, PrimeGenerator>;
             LiftingContainer lc(_ring, _primeGenerator, A, b, m);
-            RationalReconstruction<LiftingContainer> re(lc);
+            MultiModRationalReconstruction<LiftingContainer> re(lc);
 
-            if (!re.getRational(xNum, xDen, 0)) {
+            if (!re.getRational(xNum, xDen)) {
                 std::cerr << "OUCH!" << std::endl;
             }
         }

From e0c0feb65ab3dfc1b1102a547b692820096e768c Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Wed, 29 May 2019 10:35:52 +0200
Subject: [PATCH 16/63] Quick WIP commit

---
 linbox/solutions/solve/solve-dixon-rns.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 85140dc44..da1719f13 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -44,7 +44,8 @@ namespace LinBox {
         {
         }
 
-        bool getRational(IVector& xNum, IElement& xDen) {
+        bool getRational(IVector& xNum, IElement& xDen)
+        {
             VectorDomain<Ring> IVD(_lc.ring());
 
             // Stores each c0 + c1 pj + ... + ck pj^k for each pj
@@ -59,7 +60,6 @@ namespace LinBox {
                 digits[j].resize(_lc.size());
             }
 
-            // @fixme IMPLEMENT Rat Recon
             for (auto i = 0u; i < _lc.length(); ++i) {
                 _lc.next(digits);
 
@@ -73,7 +73,10 @@ namespace LinBox {
             }
 
             // @fixme From here padicAccumulations are all right, we should CRT reconstruct that
+            using CRAField = Modular<Integer>;
+            ChineseRemainder<CRABuilderFullMultip<CRAField>> cra();
 
+            // @fixme Rat Recon
 
             return true;
         }
@@ -97,8 +100,8 @@ namespace LinBox {
          * Dense solving.
          */
         template <class RVector, class Vector>
-        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
-                   const Vector& b, const Method::DixonRNS& m)
+        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
+                   const Method::DixonRNS& m)
         {
             // @fixme We should use some code from DixonSolver...
             // But that's hard so we just assume that A is square and invertible.
@@ -122,8 +125,8 @@ namespace LinBox {
      * \brief Solve specialisation for DixonRNS on dense matrices.
      */
     template <class RVector, class Ring, class Vector>
-    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
-               const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
+    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
+               const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
     {
         commentator().start("solve.dixon.integer.dense");
 

From d2857368805d13ce4734edbc013fb3121882a8b2 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 29 May 2019 11:57:41 +0200
Subject: [PATCH 17/63] Sometimes working, sometimes failing

---
 .../algorithms/multi-mod-lifting-container.h  | 12 ++++---
 linbox/solutions/solve/solve-dixon-rns.h      | 34 +++++++++++++------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 4b348cf1b..fec26fa58 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -123,9 +123,10 @@ namespace LinBox {
 
             // Compute how many iterations are needed
             auto hb = RationalSolveHadamardBound(A, b);
-            double pLog = Givaro::logtwo(_p);
+            double log2P = Givaro::logtwo(_p);
             // _iterationsCount = log2(2 * N * D) / log2(p)
-            _iterationsCount = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog);
+            _log2Bound = 1.0 + hb.numLogBound + hb.denLogBound;
+            _iterationsCount = std::ceil(_log2Bound / log2P);
             std::cout << "k: " << _iterationsCount << std::endl;
 
             // @fixme Fact is RationalReconstruction which needs numbound and denbound
@@ -198,9 +199,11 @@ namespace LinBox {
         // ----- NOT LiftingContainer API
         // ----- but still needed
 
-        const IElement numbound() const { return _numbound; }
+        const IElement& numbound() const { return _numbound; }
 
-        const IElement denbound() const { return _denbound; }
+        const IElement& denbound() const { return _denbound; }
+
+        double log2Bound() const { return _log2Bound; }
 
         uint32_t primesCount() const { return _primesCount; }
 
@@ -303,6 +306,7 @@ namespace LinBox {
 
         IElement _numbound;
         IElement _denbound;
+        double _log2Bound;
 
         IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index da1719f13..af192b232 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -51,7 +51,8 @@ namespace LinBox {
             // Stores each c0 + c1 pj + ... + ck pj^k for each pj
             std::vector<IVector> padicAccumulations(_lc.primesCount(), _lc.ring());
             // Temporary structure to store a ci for each pj
-            std::vector<IVector> digits(_lc.primesCount(), _lc.ring()); // @fixme Could be a Field Element?
+            std::vector<IVector> digits(_lc.primesCount(),
+                                        _lc.ring()); // @fixme Could be a Field Element?
             // The pj^i for each pj
             std::vector<IElement> radices(_lc.primesCount(), 1);
 
@@ -68,15 +69,28 @@ namespace LinBox {
                     std::cout << "STEP " << i << " DIGITS " << digits[j] << std::endl;
                     IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
                     _lc.ring().mulin(radices[j], _lc.prime(j));
-                    std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j] << std::endl;
+                    std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j]
+                              << std::endl;
                 }
             }
 
-            // @fixme From here padicAccumulations are all right, we should CRT reconstruct that
-            using CRAField = Modular<Integer>;
-            ChineseRemainder<CRABuilderFullMultip<CRAField>> cra();
+            // CRT reconstruction from paddicAccumulations
+            using CRAField = Givaro::Modular<Integer>;
+            RationalCRABuilderFullMultip<CRAField> craBuilder(
+                _lc.log2Bound() * 1.4427); // 1.4427 = 1 / log(2)
 
-            // @fixme Rat Recon
+            {
+                CRAField field(radices[0]);
+                craBuilder.initialize(field, padicAccumulations[0]);
+            }
+
+            for (auto j = 1u; j < _lc.primesCount(); ++j) {
+                CRAField field(radices[j]);
+                craBuilder.progress(field, padicAccumulations[j]);
+            }
+
+            // Rational reconstruction
+            craBuilder.result(xNum, xDen);
 
             return true;
         }
@@ -100,8 +114,8 @@ namespace LinBox {
          * Dense solving.
          */
         template <class RVector, class Vector>
-        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
-                   const Method::DixonRNS& m)
+        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
+                   const Vector& b, const Method::DixonRNS& m)
         {
             // @fixme We should use some code from DixonSolver...
             // But that's hard so we just assume that A is square and invertible.
@@ -125,8 +139,8 @@ namespace LinBox {
      * \brief Solve specialisation for DixonRNS on dense matrices.
      */
     template <class RVector, class Ring, class Vector>
-    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
-               const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
+    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
+               const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
     {
         commentator().start("solve.dixon.integer.dense");
 

From bfa08ed9342f9dba366edf064699080ef02786e6 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 4 Jun 2019 10:51:36 +0200
Subject: [PATCH 18/63] Detecting wrong primes using nullity

---
 .../algorithms/multi-mod-lifting-container.h  | 30 +++++--------------
 .../matrix/matrixdomain/blas-matrix-domain.h  | 21 ++++++-------
 linbox/solutions/solve/solve-dixon-rns.h      | 11 ++-----
 3 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index fec26fa58..75fd12f9e 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -143,19 +143,15 @@ namespace LinBox {
                 _B.reserve(_primesCount);
 
                 for (const auto& F : _fields) {
+                    _B.emplace_back(A, F); // Rebind into the field
+
+                    int nullity = 0;
                     BlasMatrixDomain<Field> bmd(F);
-                    _B.emplace_back(F, _n, _n);
-                    auto& Bpi = _B.back();
-
-                    // @fixme Taken for rational-solver.inl. BETTER USE REBIND!!!
-                    for (size_t i = 0; i < _n; ++i) {
-                        for (size_t j = 0; j < _n; ++j) {
-                            F.init(Bpi.refEntry(i, j), A.getEntry(i, j));
-                        }
+                    bmd.invin(_B.back(), nullity);
+                    if (nullity > 0) {
+                        // @fixme Should redraw another prime!
+                        throw LinBoxError("Wrong prime, sorry.");
                     }
-
-                    // @fixme @cpernet Use FFLAS directly, so that we can have a REAL in place inv.
-                    bmd.invin(Bpi);
                 }
             }
 
@@ -218,8 +214,6 @@ namespace LinBox {
          */
         bool next(std::vector<IVector>& digits)
         {
-            std::cout << "----- NEXT" << std::endl;
-
             VectorDomain<Ring> IVD(_ring);
 
             // @fixme Should be done in parallel!
@@ -229,8 +223,6 @@ namespace LinBox {
                 auto& Q = _Q[j];
                 auto& R = _R[j];
 
-                std::cout << "--- FOR " << Integer(pj) << std::endl;
-
                 // @todo @cpernet Is there a VectorDomain::divmod somewhere?
                 // Euclidian division so that rj = pj Qj + Rj
                 for (auto i = 0u; i < _n; ++i) {
@@ -239,10 +231,6 @@ namespace LinBox {
                     _ring.quoRem(Q[i], R[i], r[i], pj);
                 }
 
-                std::cout << "r: " << r << std::endl;
-                std::cout << "Q: " << Q << std::endl;
-                std::cout << "R: " << R << std::endl;
-
                 // Convert R to the field
                 // @fixme @cpernet Could this step be ignored?
                 // If not, put that in already allocated memory, and not use a temporary here.
@@ -253,16 +241,12 @@ namespace LinBox {
                 auto& Fc = _Fc[j];
                 B.apply(Fc, FR);
 
-                std::cout << "Fc: " << Fc << std::endl;
-
                 // @todo Convert _c[i] to RNS
                 digits[j] = IVector(_ring, Fc);
             }
 
             // ----- Compute the next residue!
 
-            std::cout << "--- Residue update" << std::endl;
-
             // @note This is a dummy implementation, for now.
 
             // r <= (r - A c) / p
diff --git a/linbox/matrix/matrixdomain/blas-matrix-domain.h b/linbox/matrix/matrixdomain/blas-matrix-domain.h
index 224ede0d6..c9fbe796c 100644
--- a/linbox/matrix/matrixdomain/blas-matrix-domain.h
+++ b/linbox/matrix/matrixdomain/blas-matrix-domain.h
@@ -631,15 +631,6 @@ namespace LinBox
 			return B.swap(A);
 		}
 
-
-		//- Inversion w singular check
-		// template <class Matrix>
-		// Matrix& inv( Matrix &Ainv, const Matrix &A, int& nullity) const
-		// {
-			// nullity = BlasMatrixDomainInv<Field,Matrix,Matrix>()(field(),Ainv,A);
-			// return Ainv;
-		// }
-
 		//! Inversion w singular check
 		template <class Matrix1, class Matrix2>
 		Matrix1& inv( Matrix1 &Ainv, const Matrix2 &A, int& nullity) const
@@ -648,7 +639,6 @@ namespace LinBox
 			return Ainv;
 		}
 
-
 		//! Inversion (the matrix A is modified) w singular check
 		template <class Matrix1, class Matrix2>
 		Matrix1& invin( Matrix1 &Ainv, Matrix2 &A, int& nullity) const
@@ -657,6 +647,17 @@ namespace LinBox
 			return Ainv;
 		}
 
+		//! Inversion (the matrix A is modified) w singular check
+		template <class Matrix>
+		Matrix& invin(Matrix& A, int& nullity) const
+		{
+			// @fixme @cpernet Apparently FFLAS has a new method that does
+			// inversion really in place, we should update this code.
+			Matrix tmp(A);
+			nullity = BlasMatrixDomainInv<Field,Matrix,Matrix>()(field(),A,tmp);
+			return A;
+		}
+
 		//! Rank
 		template <class Matrix>
 		unsigned int rank(const Matrix &A) const
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index af192b232..4cd79529c 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -66,18 +66,15 @@ namespace LinBox {
 
                 // @fixme Better use PolEval (except memory explosion?)
                 for (auto j = 0u; j < _lc.primesCount(); ++j) {
-                    std::cout << "STEP " << i << " DIGITS " << digits[j] << std::endl;
                     IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
                     _lc.ring().mulin(radices[j], _lc.prime(j));
-                    std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j]
-                              << std::endl;
                 }
             }
 
             // CRT reconstruction from paddicAccumulations
             using CRAField = Givaro::Modular<Integer>;
-            RationalCRABuilderFullMultip<CRAField> craBuilder(
-                _lc.log2Bound() * 1.4427); // 1.4427 = 1 / log(2)
+            RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound()
+                                                              / 1.4427); // 1.4427 = 1 / log(2)
 
             {
                 CRAField field(radices[0]);
@@ -149,9 +146,7 @@ namespace LinBox {
         // implicitly requiring 0-{p-1} representation of the p-adic sequence elements.
         using Field = Givaro::Modular<double>;
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
-        // PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
-        // @fixme This is for debug!
-        PrimeGenerator primeGenerator(3);
+        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
 
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);

From 5c5aea72dd3e4b85275aa98a3d3227fe145b1d5f Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 5 Jun 2019 16:05:13 +0200
Subject: [PATCH 19/63] Getting A into an RNS system

---
 .../algorithms/multi-mod-lifting-container.h  | 80 ++++++++++++-------
 linbox/solutions/hadamard-bound.h             |  7 +-
 2 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 75fd12f9e..00d4a1a9b 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -99,40 +99,62 @@ namespace LinBox {
             std::cout << "l: " << _primesCount << std::endl;
 
             // Generating primes
-            IElement iTmp;
-            _ring.assign(_p, _ring.one);
-            for (auto j = 0u; j < _primesCount; ++j) {
-                auto pj = *primeGenerator;
-                ++primeGenerator;
+            {
+                IElement iTmp;
+                _ring.assign(_p, _ring.one);
+                for (auto j = 0u; j < _primesCount; ++j) {
+                    auto pj = *primeGenerator;
+                    ++primeGenerator;
+
+                    // Ensure that all primes are different
+                    if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) {
+                        j -= 1;
+                        continue;
+                    }
 
-                // Ensure that all primes are different
-                if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) {
-                    j -= 1;
-                    continue;
-                }
+                    _primes.emplace_back(pj);
+                    _fields.emplace_back(pj);
+                    _ring.init(iTmp, pj);
+                    _ring.mulin(_p, iTmp);
 
-                _primes.emplace_back(pj);
-                _fields.emplace_back(pj);
-                _ring.init(iTmp, pj);
-                _ring.mulin(_p, iTmp);
+                    std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl;
+                }
 
-                std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl;
+                std::cout << "p: " << _p << std::endl;
             }
 
-            std::cout << "p: " << _p << std::endl;
-
             // Compute how many iterations are needed
-            auto hb = RationalSolveHadamardBound(A, b);
-            double log2P = Givaro::logtwo(_p);
-            // _iterationsCount = log2(2 * N * D) / log2(p)
-            _log2Bound = 1.0 + hb.numLogBound + hb.denLogBound;
-            _iterationsCount = std::ceil(_log2Bound / log2P);
-            std::cout << "k: " << _iterationsCount << std::endl;
-
-            // @fixme Fact is RationalReconstruction which needs numbound and denbound
-            // expects them to be in non-log...
-            _ring.init(_numbound, Integer(1) << static_cast<uint64_t>(std::ceil(hb.numLogBound)));
-            _ring.init(_denbound, Integer(1) << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
+            {
+                auto hb = RationalSolveHadamardBound(A, b);
+                double log2P = Givaro::logtwo(_p);
+                // _iterationsCount = log2(2 * N * D) / log2(p)
+                _log2Bound = hb.solutionLogBound;
+                _iterationsCount = std::ceil(_log2Bound / log2P);
+                std::cout << "k: " << _iterationsCount << std::endl;
+
+                // @fixme Fact is RationalReconstruction which needs numbound and denbound
+                // expects them to be in non-log... @fixme Still needed?
+                _ring.init(_numbound, Integer(1)
+                                          << static_cast<uint64_t>(std::ceil(hb.numLogBound)));
+                _ring.init(_denbound, Integer(1)
+                                          << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
+            }
+
+            // Making A into a RNS domain
+            {
+                // @fixme Really provide the primes, with the correct bound
+                FFPACK::rns_double rnsSystem(std::vector<double>({59059367, 57648973}));
+                FFPACK::RNSInteger<FFPACK::rns_double> rnsDomain(rnsSystem);
+                auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim());
+
+                Integer max;
+                InfinityNorm(max, A);
+                double logMax = Givaro::logtwo(max) / 16.; // @note So that 2^(16*k) is the max.
+                FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), logMax, A.getPointer(), A.stride(),
+                                 rnsA);
+
+                std::cout << "rnsA: " << rnsA[0]._ptr[0] << " " << rnsA[0]._ptr[1] << std::endl;
+            }
 
             // Initialize all inverses
             // @note An inverse mod some p within DixonSolver<Dense> was already computed,
@@ -155,7 +177,7 @@ namespace LinBox {
                 }
             }
 
-            //----- Iteration
+            //----- Locals setup
 
             _r.reserve(_primesCount);
             _Q.reserve(_primesCount);
diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h
index 8216994e2..48b9bcf55 100644
--- a/linbox/solutions/hadamard-bound.h
+++ b/linbox/solutions/hadamard-bound.h
@@ -282,7 +282,11 @@ namespace LinBox {
 
     // ----- Fast Hadamard bound
 
-
+    template <class IMatrix>
+    inline Integer& InfinityNorm(Integer& max, const IMatrix& A) {
+        typename MatrixTraits<IMatrix>::MatrixCategory tag;
+        return InfinityNorm(max, A, tag);
+    }
 
     /**
      * Returns the maximal absolute value.
@@ -294,7 +298,6 @@ namespace LinBox {
         return InfinityNorm(max, ACopy, MatrixCategories::RowColMatrixTag());
     }
 
-
     template <class IMatrix>
     inline Integer& InfinityNorm(Integer& max, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag)
     {

From 82433463881b7e906977285fe17d7b148ad80cec Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 6 Jun 2019 15:15:18 +0200
Subject: [PATCH 20/63] Creating the RNS basis, sorting primes

---
 .../algorithms/multi-mod-lifting-container.h  | 120 ++++++++++++------
 linbox/solutions/solve/solve-dixon-rns.h      |   3 -
 tests/test-solve-full.C                       |   8 +-
 3 files changed, 82 insertions(+), 49 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 00d4a1a9b..c996d6c17 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -94,66 +94,72 @@ namespace LinBox {
             A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
             std::cout << "b: " << b << std::endl;
 
+            // This will contain the primes or our MultiMod basis
             // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?)
             _primesCount = 2;
+            _primes.resize(_primesCount);
             std::cout << "l: " << _primesCount << std::endl;
 
-            // Generating primes
+            // Some preparation work
+            Integer infinityNormA;
+            InfinityNorm(infinityNormA, A);
+            double logInfinityNormA = Givaro::logtwo(infinityNormA);
+
             {
-                IElement iTmp;
-                _ring.assign(_p, _ring.one);
-                for (auto j = 0u; j < _primesCount; ++j) {
-                    auto pj = *primeGenerator;
+                // Based on Chen-Storjohann's paper, this is the bit size
+                // of the needed RNS basis for the residue computation
+                double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)) * 16; // @fixme @cpernet Does this factor 16 makes sense?
+                uint32_t rnsBasisPrimesCount =
+                    std::ceil(rnsBasisBitSize / primeGenerator.getBits());
+                _rnsPrimes.resize(rnsBasisPrimesCount);
+                std::cout << "RNS basis: " << rnsBasisPrimesCount << " estimated primes." << std::endl;
+
+                std::vector<double> primes;
+                for (auto j = 0u; j < _primesCount + rnsBasisPrimesCount; ++j) {
+                    auto p = *primeGenerator;
                     ++primeGenerator;
 
-                    // Ensure that all primes are different
-                    if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) {
-                        j -= 1;
+                    auto lb = std::lower_bound(primes.begin(), primes.end(), p);
+                    if (lb != primes.end() && *lb == p) {
+                        --j;
                         continue;
                     }
 
-                    _primes.emplace_back(pj);
-                    _fields.emplace_back(pj);
-                    _ring.init(iTmp, pj);
-                    _ring.mulin(_p, iTmp);
-
-                    std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl;
+                    // Inserting the primes at the right place to keep the array sorted
+                    primes.insert(lb, p);
                 }
 
-                std::cout << "p: " << _p << std::endl;
-            }
+                // We take the smallest primes for our MultiMod basis
+                std::copy(primes.begin(), primes.begin() + _primesCount, _primes.begin());
 
-            // Compute how many iterations are needed
-            {
-                auto hb = RationalSolveHadamardBound(A, b);
-                double log2P = Givaro::logtwo(_p);
-                // _iterationsCount = log2(2 * N * D) / log2(p)
-                _log2Bound = hb.solutionLogBound;
-                _iterationsCount = std::ceil(_log2Bound / log2P);
-                std::cout << "k: " << _iterationsCount << std::endl;
+                // And the others for our RNS basis
+                std::copy(primes.begin() + _primesCount, primes.end(), _rnsPrimes.begin());
 
-                // @fixme Fact is RationalReconstruction which needs numbound and denbound
-                // expects them to be in non-log... @fixme Still needed?
-                _ring.init(_numbound, Integer(1)
-                                          << static_cast<uint64_t>(std::ceil(hb.numLogBound)));
-                _ring.init(_denbound, Integer(1)
-                                          << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
+                // We check that we really need all the primes within the RNS basis,
+                // as the first count was just an upper estimation.
+                double bitSize = 0.0;
+                for (int i = _rnsPrimes.size() - 1; i >= 0; --i) {
+                    bitSize += Givaro::logtwo(primes[i]);
+
+                    if (bitSize > rnsBasisBitSize && i > 0) {
+                        _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (i - 1));
+                        std::cout << "RNS basis: Erasing extra " << i << "primes." << std::endl;
+                        break;
+                    }
+                }
             }
 
-            // Making A into a RNS domain
+            // Generating primes
             {
-                // @fixme Really provide the primes, with the correct bound
-                FFPACK::rns_double rnsSystem(std::vector<double>({59059367, 57648973}));
-                FFPACK::RNSInteger<FFPACK::rns_double> rnsDomain(rnsSystem);
-                auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim());
-
-                Integer max;
-                InfinityNorm(max, A);
-                double logMax = Givaro::logtwo(max) / 16.; // @note So that 2^(16*k) is the max.
-                FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), logMax, A.getPointer(), A.stride(),
-                                 rnsA);
+                IElement iTmp;
+                _ring.assign(_p, _ring.one);
+                for (auto& pj : _primes) {
+                    _fields.emplace_back(pj);
+                    _ring.init(iTmp, pj);
+                    _ring.mulin(_p, iTmp);
+                }
 
-                std::cout << "rnsA: " << rnsA[0]._ptr[0] << " " << rnsA[0]._ptr[1] << std::endl;
+                std::cout << "p: " << _p << std::endl;
             }
 
             // Initialize all inverses
@@ -177,6 +183,35 @@ namespace LinBox {
                 }
             }
 
+            // Making A into the RNS domain
+            {
+                FFPACK::rns_double rnsSystem(_rnsPrimes);
+                FFPACK::RNSInteger<FFPACK::rns_double> rnsDomain(rnsSystem);
+                auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim());
+
+                double cmax =
+                    logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A.
+                FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), cmax, A.getPointer(),
+                                 A.stride(), rnsA);
+            }
+
+            // Compute how many iterations are needed
+            {
+                auto hb = RationalSolveHadamardBound(A, b);
+                double log2P = Givaro::logtwo(_p);
+                // _iterationsCount = log2(2 * N * D) / log2(p)
+                _log2Bound = hb.solutionLogBound;
+                _iterationsCount = std::ceil(_log2Bound / log2P);
+                std::cout << "k: " << _iterationsCount << std::endl;
+
+                // @fixme Fact is RationalReconstruction which needs numbound and denbound
+                // expects them to be in non-log... @fixme Still needed?
+                _ring.init(_numbound, Integer(1)
+                                          << static_cast<uint64_t>(std::ceil(hb.numLogBound)));
+                _ring.init(_denbound, Integer(1)
+                                          << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
+            }
+
             //----- Locals setup
 
             _r.reserve(_primesCount);
@@ -316,6 +351,7 @@ namespace LinBox {
 
         IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
+        std::vector<double> _rnsPrimes;
         size_t
             _iterationsCount; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
         size_t _n;            // Row/column dimension of A.
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 4cd79529c..630ac235d 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -151,9 +151,6 @@ namespace LinBox {
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);
 
-        std::cout << "FOUND xNum: " << xNum << std::endl;
-        std::cout << "FOUND xDen: " << xDen << std::endl;
-
         commentator().stop("solve.dixon.integer.dense");
 
         // @fixme Implement something like that
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 54ea8c083..5b9e5acec 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -141,10 +141,10 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R
         solve(x, A, b, method);
         ok = check_result<SolveMethod>(x, A, b, RA, Rb);
 
-        if (ok) {
-            solveInPlace(x, A, b, method);
-            ok = check_result<SolveMethod>(x, A, b, RA, Rb);
-        }
+        // if (ok) {
+        //     solveInPlace(x, A, b, method);
+        //     ok = check_result<SolveMethod>(x, A, b, RA, Rb);
+        // }
     } catch (...) {
         print_error<SolveMethod>(x, A, b, "throws error");
         return false;

From c682b211d871b258d6c68725ea18433af0c17ab1 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 11 Jun 2019 17:52:39 +0200
Subject: [PATCH 21/63] Failed to understand how to write directly to an
 rns_element_ptr

---
 .../algorithms/multi-mod-lifting-container.h  | 75 ++++++++++++++-----
 1 file changed, 56 insertions(+), 19 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index c996d6c17..b3a9863c8 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -70,6 +70,11 @@ namespace LinBox {
         using Field = _Field;
         using PrimeGenerator = _PrimeGenerator;
 
+        using RNSSystem = FFPACK::rns_double;
+        using RNSDomain = FFPACK::RNSInteger<FFPACK::rns_double>;
+        using RNSElement = typename RNSDomain::Element;
+        using RNSElementPtr = typename RNSDomain::Element_ptr;
+
         using IElement = typename Ring::Element;
         using IMatrix = DenseMatrix<_Ring>;
         using IVector = DenseVector<_Ring>;
@@ -108,14 +113,15 @@ namespace LinBox {
             {
                 // Based on Chen-Storjohann's paper, this is the bit size
                 // of the needed RNS basis for the residue computation
-                double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)) * 16; // @fixme @cpernet Does this factor 16 makes sense?
-                uint32_t rnsBasisPrimesCount =
-                    std::ceil(rnsBasisBitSize / primeGenerator.getBits());
-                _rnsPrimes.resize(rnsBasisPrimesCount);
-                std::cout << "RNS basis: " << rnsBasisPrimesCount << " estimated primes." << std::endl;
+                double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n))
+                                         * 16; // @fixme @cpernet Does this factor 16 makes sense?
+                _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
+                _rnsPrimes.resize(_rnsBasisPrimesCount);
+                std::cout << "RNS basis: " << _rnsBasisPrimesCount << " estimated primes."
+                          << std::endl;
 
                 std::vector<double> primes;
-                for (auto j = 0u; j < _primesCount + rnsBasisPrimesCount; ++j) {
+                for (auto j = 0u; j < _primesCount + _rnsBasisPrimesCount; ++j) {
                     auto p = *primeGenerator;
                     ++primeGenerator;
 
@@ -185,14 +191,17 @@ namespace LinBox {
 
             // Making A into the RNS domain
             {
-                FFPACK::rns_double rnsSystem(_rnsPrimes);
-                FFPACK::RNSInteger<FFPACK::rns_double> rnsDomain(rnsSystem);
-                auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim());
+                RNSSystem rnsSystem(_rnsPrimes);
+                _rnsDomain = new RNSDomain(rnsSystem);
+                _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n);
+
+                // @fixme @cpernet Just it be transpose for better memory access between threads?
+                // Each column is the current digit c[j] mod pj
+                _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
 
                 double cmax =
                     logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A.
-                FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), cmax, A.getPointer(),
-                                 A.stride(), rnsA);
+                FFLAS::finit_rns(*_rnsDomain, _n, _n, cmax, A.getPointer(), A.stride(), _rnsA);
             }
 
             // Compute how many iterations are needed
@@ -231,6 +240,12 @@ namespace LinBox {
             }
         }
 
+        ~MultiModLiftingContainer()
+        {
+            FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size?
+            delete _rnsDomain;
+        }
+
         // --------------------------
         // ----- LiftingContainer API
 
@@ -298,22 +313,38 @@ namespace LinBox {
                 auto& Fc = _Fc[j];
                 B.apply(Fc, FR);
 
-                // @todo Convert _c[i] to RNS
                 digits[j] = IVector(_ring, Fc);
+
+                // Store the very same result in an RNS system,
+                // but fact is all the primes of the RNS system are bigger
+                // than the modulus used to compute _Fc, we just copy the result for everybody.
+                std::cout << "FOR " << pj << std::endl;
+                for (auto i = 0u; i < _n; ++i) {
+                    // std::cout << _rnsc[i * _n + j]._ptr << std::endl;
+                    double cij = _Fc[j][i];
+                    std::cout << "stride " << _rnsc[i * _n + j]._stride << std::endl;
+                    auto stride = _rnsc[i * _n + j]._stride;
+                    for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) {
+                        _rnsc[i * _n + j]._ptr[h + stride] = cij;
+                    }
+                    _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]);
+                    std::cout << std::endl;
+                }
             }
 
             // ----- Compute the next residue!
 
-            // @note This is a dummy implementation, for now.
-
             // r <= (r - A c) / p
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
-                auto& r = _r[j]; // @fixme THEY HOLD ALL THE VERY SAME VALUE!
+                auto& r = _r[j];
                 auto& Q = _Q[j];
                 auto& R = _R[j];
 
                 auto& Fc = _Fc[j];
+
+                // @note We know that _Fc  @fixme @todo XXXX
+
                 // @fixme For now, we convert cj to integer,
                 // but it should be converted into a RNS system, on pre-allocated memory.
                 IVector Ic(_ring, Fc);
@@ -349,13 +380,19 @@ namespace LinBox {
         IElement _denbound;
         double _log2Bound;
 
+        RNSDomain* _rnsDomain = nullptr;
+        RNSElementPtr _rnsA; // The matrix A, but in the RNS system
+        // A matrix of digits c[j], being the current digits mod pj, in the RNS system
+        RNSElementPtr _rnsc;
+        size_t _rnsBasisPrimesCount = 0u;
+
         IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
         std::vector<double> _rnsPrimes;
-        size_t
-            _iterationsCount; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
-        size_t _n;            // Row/column dimension of A.
-        size_t _primesCount;  // How many primes. Equal to _primes.size().
+        // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
+        size_t _iterationsCount = 0u;
+        size_t _n = 0u;           // Row/column dimension of A.
+        size_t _primesCount = 0u; // How many primes. Equal to _primes.size().
 
         std::vector<FMatrix> _B;    // Inverses of A mod p[i]
         std::vector<Field> _fields; // All fields Modular<p[i]>

From 2f49d175aaccff8c63f5cbf0855839e1a5a9dea2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Pernet?= <clement.pernet@gmail.com>
Date: Wed, 12 Jun 2019 10:23:53 +0200
Subject: [PATCH 22/63] fix *16 hacks and *stride

---
 linbox/algorithms/multi-mod-lifting-container.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index b3a9863c8..13ef04c25 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -113,8 +113,7 @@ namespace LinBox {
             {
                 // Based on Chen-Storjohann's paper, this is the bit size
                 // of the needed RNS basis for the residue computation
-                double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n))
-                                         * 16; // @fixme @cpernet Does this factor 16 makes sense?
+                double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n));
                 _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
                 _rnsPrimes.resize(_rnsBasisPrimesCount);
                 std::cout << "RNS basis: " << _rnsBasisPrimesCount << " estimated primes."
@@ -201,7 +200,7 @@ namespace LinBox {
 
                 double cmax =
                     logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A.
-                FFLAS::finit_rns(*_rnsDomain, _n, _n, cmax, A.getPointer(), A.stride(), _rnsA);
+                FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(), _rnsA);
             }
 
             // Compute how many iterations are needed
@@ -325,7 +324,7 @@ namespace LinBox {
                     std::cout << "stride " << _rnsc[i * _n + j]._stride << std::endl;
                     auto stride = _rnsc[i * _n + j]._stride;
                     for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) {
-                        _rnsc[i * _n + j]._ptr[h + stride] = cij;
+                        _rnsc[i * _n + j]._ptr[h * stride] = cij;
                     }
                     _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]);
                     std::cout << std::endl;

From b01f4875a9c0b89738ea232f6f2b1d042b69a49e Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 12 Jun 2019 14:35:05 +0200
Subject: [PATCH 23/63] Better names

---
 .../algorithms/multi-mod-lifting-container.h  | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 13ef04c25..885a8d429 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -103,7 +103,7 @@ namespace LinBox {
             // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?)
             _primesCount = 2;
             _primes.resize(_primesCount);
-            std::cout << "l: " << _primesCount << std::endl;
+            std::cout << "primesCount: " << _primesCount << std::endl;
 
             // Some preparation work
             Integer infinityNormA;
@@ -116,14 +116,15 @@ namespace LinBox {
                 double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n));
                 _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
                 _rnsPrimes.resize(_rnsBasisPrimesCount);
-                std::cout << "RNS basis: " << _rnsBasisPrimesCount << " estimated primes."
-                          << std::endl;
+                std::cout << "rnsBasisPrimesCount: " << _rnsBasisPrimesCount << std::endl;
 
                 std::vector<double> primes;
                 for (auto j = 0u; j < _primesCount + _rnsBasisPrimesCount; ++j) {
                     auto p = *primeGenerator;
                     ++primeGenerator;
 
+                    // @note std::lower_bound finds the iterator where to put p in the sorted container.
+                    // The name of the routine might be strange, but, hey, that's not my fault.
                     auto lb = std::lower_bound(primes.begin(), primes.end(), p);
                     if (lb != primes.end() && *lb == p) {
                         --j;
@@ -157,14 +158,14 @@ namespace LinBox {
             // Generating primes
             {
                 IElement iTmp;
-                _ring.assign(_p, _ring.one);
+                _ring.assign(_primesProduct, _ring.one);
                 for (auto& pj : _primes) {
                     _fields.emplace_back(pj);
                     _ring.init(iTmp, pj);
-                    _ring.mulin(_p, iTmp);
+                    _ring.mulin(_primesProduct, iTmp);
                 }
 
-                std::cout << "p: " << _p << std::endl;
+                std::cout << "primesProduct: " << _primesProduct << std::endl;
             }
 
             // Initialize all inverses
@@ -206,11 +207,11 @@ namespace LinBox {
             // Compute how many iterations are needed
             {
                 auto hb = RationalSolveHadamardBound(A, b);
-                double log2P = Givaro::logtwo(_p);
+                double log2P = Givaro::logtwo(_primesProduct);
                 // _iterationsCount = log2(2 * N * D) / log2(p)
                 _log2Bound = hb.solutionLogBound;
                 _iterationsCount = std::ceil(_log2Bound / log2P);
-                std::cout << "k: " << _iterationsCount << std::endl;
+                std::cout << "iterationsCount: " << _iterationsCount << std::endl;
 
                 // @fixme Fact is RationalReconstruction which needs numbound and denbound
                 // expects them to be in non-log... @fixme Still needed?
@@ -260,7 +261,7 @@ namespace LinBox {
          * We are compliant to the interface even though
          * p is multi-modular and thus not a prime per se.
          */
-        const IElement& prime() const final { return _p; }
+        const IElement& prime() const final { return _primesProduct; }
 
         // ------------------------------
         // ----- NOT LiftingContainer API
@@ -294,7 +295,7 @@ namespace LinBox {
                 auto& Q = _Q[j];
                 auto& R = _R[j];
 
-                // @todo @cpernet Is there a VectorDomain::divmod somewhere?
+                // @note There is no VectorDomain::divmod yet.
                 // Euclidian division so that rj = pj Qj + Rj
                 for (auto i = 0u; i < _n; ++i) {
                     // @fixme @cpernet Is this OK for any Ring or should we be sure we are using
@@ -312,22 +313,19 @@ namespace LinBox {
                 auto& Fc = _Fc[j];
                 B.apply(Fc, FR);
 
+                // @fixme We might not need to store digits into IVectors, and returning _Fc
+                // would do the trick
                 digits[j] = IVector(_ring, Fc);
 
                 // Store the very same result in an RNS system,
                 // but fact is all the primes of the RNS system are bigger
                 // than the modulus used to compute _Fc, we just copy the result for everybody.
-                std::cout << "FOR " << pj << std::endl;
                 for (auto i = 0u; i < _n; ++i) {
-                    // std::cout << _rnsc[i * _n + j]._ptr << std::endl;
-                    double cij = _Fc[j][i];
-                    std::cout << "stride " << _rnsc[i * _n + j]._stride << std::endl;
+                    double cij = Fc[i];
                     auto stride = _rnsc[i * _n + j]._stride;
                     for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) {
                         _rnsc[i * _n + j]._ptr[h * stride] = cij;
                     }
-                    _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]);
-                    std::cout << std::endl;
                 }
             }
 
@@ -385,7 +383,7 @@ namespace LinBox {
         RNSElementPtr _rnsc;
         size_t _rnsBasisPrimesCount = 0u;
 
-        IElement _p;                   // The global modulus for lifting: a multiple of all _primes.
+        IElement _primesProduct;                   // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
         std::vector<double> _rnsPrimes;
         // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).

From 8a0343ea5e4d6073acf3ba76a42940a697979794 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 12 Jun 2019 14:54:19 +0200
Subject: [PATCH 24/63] Fixed segfaulting because of RNSSystem not being copied

---
 .../algorithms/multi-mod-lifting-container.h  | 42 +++++++++++++------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 885a8d429..5197d10aa 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -123,8 +123,9 @@ namespace LinBox {
                     auto p = *primeGenerator;
                     ++primeGenerator;
 
-                    // @note std::lower_bound finds the iterator where to put p in the sorted container.
-                    // The name of the routine might be strange, but, hey, that's not my fault.
+                    // @note std::lower_bound finds the iterator where to put p in the sorted
+                    // container. The name of the routine might be strange, but, hey, that's not my
+                    // fault.
                     auto lb = std::lower_bound(primes.begin(), primes.end(), p);
                     if (lb != primes.end() && *lb == p) {
                         --j;
@@ -191,17 +192,16 @@ namespace LinBox {
 
             // Making A into the RNS domain
             {
-                RNSSystem rnsSystem(_rnsPrimes);
-                _rnsDomain = new RNSDomain(rnsSystem);
+                _rnsSystem = new RNSSystem(_rnsPrimes);
+                _rnsDomain = new RNSDomain(*_rnsSystem);
                 _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n);
-
-                // @fixme @cpernet Just it be transpose for better memory access between threads?
-                // Each column is the current digit c[j] mod pj
                 _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
+                _rnsAc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
 
-                double cmax =
-                    logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A.
-                FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(), _rnsA);
+                // @note So that 2^(16*cmax) is the max element of A.
+                double cmax = logInfinityNormA / 16.;
+                FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(),
+                                 _rnsA);
             }
 
             // Compute how many iterations are needed
@@ -244,6 +244,7 @@ namespace LinBox {
         {
             FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size?
             delete _rnsDomain;
+            delete _rnsSystem;
         }
 
         // --------------------------
@@ -331,6 +332,20 @@ namespace LinBox {
 
             // ----- Compute the next residue!
 
+            // @note The compute the next residu r <= (r - A c) / p
+            // By first doing A c as a fgemm within the RNS domain.
+            FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _n,
+                         _primesCount, _rnsDomain->one, _rnsA, _n, _rnsc, _n, _rnsDomain->zero,
+                         _rnsAc, _n);
+
+            std::cout << "---------" << std::endl;
+            for (auto i = 0u; i < _n; ++i) {
+                for (auto j = 0u; j < _primesCount; ++j) {
+                    _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j])
+                        << std::endl;
+                }
+            }
+
             // r <= (r - A c) / p
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
@@ -340,8 +355,6 @@ namespace LinBox {
 
                 auto& Fc = _Fc[j];
 
-                // @note We know that _Fc  @fixme @todo XXXX
-
                 // @fixme For now, we convert cj to integer,
                 // but it should be converted into a RNS system, on pre-allocated memory.
                 IVector Ic(_ring, Fc);
@@ -377,13 +390,16 @@ namespace LinBox {
         IElement _denbound;
         double _log2Bound;
 
+        RNSSystem* _rnsSystem = nullptr;
         RNSDomain* _rnsDomain = nullptr;
         RNSElementPtr _rnsA; // The matrix A, but in the RNS system
         // A matrix of digits c[j], being the current digits mod pj, in the RNS system
         RNSElementPtr _rnsc;
+        // The result matrix of the fgemm _rnsA * _rnsc.
+        RNSElementPtr _rnsAc;
         size_t _rnsBasisPrimesCount = 0u;
 
-        IElement _primesProduct;                   // The global modulus for lifting: a multiple of all _primes.
+        IElement _primesProduct;       // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
         std::vector<double> _rnsPrimes;
         // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).

From 9f50edf253a4aad6067f657bff48358364097533 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 12 Jun 2019 16:48:36 +0200
Subject: [PATCH 25/63] RNS-based dixon working only for matrix size = 2

---
 .../algorithms/multi-mod-lifting-container.h  | 146 ++++++++++++------
 1 file changed, 98 insertions(+), 48 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 5197d10aa..09497582a 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -114,12 +114,12 @@ namespace LinBox {
                 // Based on Chen-Storjohann's paper, this is the bit size
                 // of the needed RNS basis for the residue computation
                 double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n));
-                _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
-                _rnsPrimes.resize(_rnsBasisPrimesCount);
-                std::cout << "rnsBasisPrimesCount: " << _rnsBasisPrimesCount << std::endl;
+                _rnsPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
+                _rnsPrimes.resize(_rnsPrimesCount);
+                std::cout << "rnsBasisPrimesCount: " << _rnsPrimesCount << std::endl;
 
                 std::vector<double> primes;
-                for (auto j = 0u; j < _primesCount + _rnsBasisPrimesCount; ++j) {
+                for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) {
                     auto p = *primeGenerator;
                     ++primeGenerator;
 
@@ -145,18 +145,20 @@ namespace LinBox {
                 // We check that we really need all the primes within the RNS basis,
                 // as the first count was just an upper estimation.
                 double bitSize = 0.0;
-                for (int i = _rnsPrimes.size() - 1; i >= 0; --i) {
-                    bitSize += Givaro::logtwo(primes[i]);
+                for (int h = _rnsPrimes.size() - 1; h >= 0; --h) {
+                    bitSize += Givaro::logtwo(primes[h]);
 
-                    if (bitSize > rnsBasisBitSize && i > 0) {
-                        _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (i - 1));
-                        std::cout << "RNS basis: Erasing extra " << i << "primes." << std::endl;
+                    if (bitSize > rnsBasisBitSize && h > 0) {
+                        _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (h - 1));
+                        _rnsPrimesCount -= h;
+                        std::cout << "RNS basis: Erasing extra " << h << "primes." << std::endl;
                         break;
                     }
                 }
             }
 
             // Generating primes
+            // @fixme Cleanup, might not be needed
             {
                 IElement iTmp;
                 _ring.assign(_primesProduct, _ring.one);
@@ -196,7 +198,7 @@ namespace LinBox {
                 _rnsDomain = new RNSDomain(*_rnsSystem);
                 _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n);
                 _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
-                _rnsAc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
+                _rnsR = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
 
                 // @note So that 2^(16*cmax) is the max element of A.
                 double cmax = logInfinityNormA / 16.;
@@ -204,6 +206,20 @@ namespace LinBox {
                                  _rnsA);
             }
 
+            // Compute the inverses of pj for each RNS prime
+            {
+                _primesRNSInverses.resize(_primesCount);
+                for (auto j = 0u; j < _primesCount; ++j) {
+                    auto prime = _primes[j];
+                    _primesRNSInverses[j].resize(_rnsPrimesCount);
+                    for (auto h = 0u; h < _rnsPrimesCount; ++h) {
+                        auto& rnsF = _rnsSystem->_field_rns[h];
+                        auto& primeInverse = _primesRNSInverses[j][h];
+                        rnsF.inv(primeInverse, prime);
+                    }
+                }
+            }
+
             // Compute how many iterations are needed
             {
                 auto hb = RationalSolveHadamardBound(A, b);
@@ -242,6 +258,8 @@ namespace LinBox {
 
         ~MultiModLiftingContainer()
         {
+            FFLAS::fflas_delete(_rnsR); // @fixme Does it knows the size?
+            FFLAS::fflas_delete(_rnsc); // @fixme Does it knows the size?
             FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size?
             delete _rnsDomain;
             delete _rnsSystem;
@@ -322,63 +340,94 @@ namespace LinBox {
                 // but fact is all the primes of the RNS system are bigger
                 // than the modulus used to compute _Fc, we just copy the result for everybody.
                 for (auto i = 0u; i < _n; ++i) {
-                    double cij = Fc[i];
-                    auto stride = _rnsc[i * _n + j]._stride;
-                    for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) {
-                        _rnsc[i * _n + j]._ptr[h * stride] = cij;
-                    }
+                    setRNSMatrixElementAllResidues(_rnsR, _n, i, j, FR[i]);
+                    setRNSMatrixElementAllResidues(_rnsc, _n, i, j, Fc[i]);
                 }
             }
 
-            // ----- Compute the next residue!
+            // ----- Compute the next residues!
 
-            // @note The compute the next residu r <= (r - A c) / p
-            // By first doing A c as a fgemm within the RNS domain.
-            FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _n,
-                         _primesCount, _rnsDomain->one, _rnsA, _n, _rnsc, _n, _rnsDomain->zero,
-                         _rnsAc, _n);
+            // r <= Q + (R - A c) / p
 
-            std::cout << "---------" << std::endl;
-            for (auto i = 0u; i < _n; ++i) {
-                for (auto j = 0u; j < _primesCount; ++j) {
-                    _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j])
-                        << std::endl;
+            std::cout << "A" << std::endl;
+            for (auto j = 0u; j < _n; ++j) {
+                for (auto i = 0u; i < _n; ++i) {
+                    logRNSMatrixElement(_rnsA, _n, i, j);
                 }
             }
 
-            // r <= (r - A c) / p
+            std::cout << "c" << std::endl;
             for (auto j = 0u; j < _primesCount; ++j) {
-                auto pj = _primes[j];
-                auto& r = _r[j];
-                auto& Q = _Q[j];
-                auto& R = _R[j];
+                for (auto i = 0u; i < _n; ++i) {
+                    logRNSMatrixElement(_rnsc, _n, i, j);
+                }
+            }
 
-                auto& Fc = _Fc[j];
+            // By first computing R <= R - A c as a fgemm within the RNS domain.
+            FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n,
+                         _rnsDomain->mOne, _rnsA, _n, _rnsc, _n, _rnsDomain->one,
+                         _rnsR, _n);
+
+            std::cout << "R = Ac" << std::endl;
+            for (auto j = 0u; j < _primesCount; ++j) {
+                for (auto i = 0u; i < _n; ++i) {
+                    logRNSMatrixElement(_rnsR, _n, i, j);
+                }
+            }
 
-                // @fixme For now, we convert cj to integer,
-                // but it should be converted into a RNS system, on pre-allocated memory.
-                IVector Ic(_ring, Fc);
+            // We divide each residues by the according pj, which is done by multiplying.
+            // @fixme Could be done in parallel!
+            for (auto j = 0u; j < _primesCount; ++j) {
+                for (auto i = 0u; i < _n; ++i) {
+                    auto& rnsElement = _rnsR[i * _n + j];
+                    auto stride = rnsElement._stride;
+                    for (auto h = 0u; h < _rnsPrimesCount; ++h) {
+                        auto& rnsF = _rnsSystem->_field_rns[h];
+                        rnsF.mulin(rnsElement._ptr[h * stride], _primesRNSInverses[j][h]);
+                    }
+                }
+            }
 
-                // @fixme Should become a matrix-matrix multiplication!
-                // @fixme Should be able to do a gemv
-                _A.apply(r, Ic); // r = A c
-                IVD.negin(r);    // r = - A c
-                IVD.addin(r, R); // r = R - A c
+            // @fixme Could be done in parallel!
+            for (auto j = 0u; j < _primesCount; ++j) {
+                auto& r = _r[j];
+                auto& Q = _Q[j];
 
-                // r = (R - A c) / p
-                IElement Ipj;
-                _ring.init(Ipj, pj);
+                // r <- (R - Ac) / p
+                // @fixme @cpernet Don't know how to do that with one fconvert_rns!
                 for (auto i = 0u; i < _n; ++i) {
-                    _ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain?
+                    FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _n + j));
                 }
 
-                IVD.addin(r, Q); // r = Q + (R - A c) / p
+                // r <- Q + (R - Ac) / p
+                IVD.addin(r, Q);
             }
 
             ++_position;
             return true;
         }
 
+    private:
+        // Helper function, setting all residues of a matrix element to the very same value.
+        // This doesn't check the moduli.
+        void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j,
+                                            double value)
+        {
+            auto stride = A[i * lda + j]._stride;
+            for (auto h = 0u; h < _rnsPrimesCount; ++h) {
+                A[i * lda + j]._ptr[h * stride] = value;
+            }
+        }
+
+        void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j)
+        {
+            Integer reconstructedInteger;
+            FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &reconstructedInteger, 1, A + (i * lda + j));
+            std::cout << i << " " << j << " ";
+            _rnsDomain->write(std::cout, A[i * lda + j]);
+            std::cout << " -> " << reconstructedInteger << std::endl;
+        }
+
     private:
         const Ring& _ring;
 
@@ -395,9 +444,10 @@ namespace LinBox {
         RNSElementPtr _rnsA; // The matrix A, but in the RNS system
         // A matrix of digits c[j], being the current digits mod pj, in the RNS system
         RNSElementPtr _rnsc;
-        // The result matrix of the fgemm _rnsA * _rnsc.
-        RNSElementPtr _rnsAc;
-        size_t _rnsBasisPrimesCount = 0u;
+        RNSElementPtr _rnsR;
+        size_t _rnsPrimesCount = 0u;
+        // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i]
+        std::vector<std::vector<FElement>> _primesRNSInverses;
 
         IElement _primesProduct;       // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!

From db3b78f1aa3133cdee82062c4e844f6d3260c1cf Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 13 Jun 2019 14:55:05 +0200
Subject: [PATCH 26/63] Fixed DixonRNS solver for dimension != 2

---
 .../algorithms/multi-mod-lifting-container.h  | 40 +++++--------------
 1 file changed, 10 insertions(+), 30 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 09497582a..d350aec16 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -258,9 +258,9 @@ namespace LinBox {
 
         ~MultiModLiftingContainer()
         {
-            FFLAS::fflas_delete(_rnsR); // @fixme Does it knows the size?
-            FFLAS::fflas_delete(_rnsc); // @fixme Does it knows the size?
-            FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size?
+            FFLAS::fflas_delete(_rnsR);
+            FFLAS::fflas_delete(_rnsc);
+            FFLAS::fflas_delete(_rnsA);
             delete _rnsDomain;
             delete _rnsSystem;
         }
@@ -340,8 +340,8 @@ namespace LinBox {
                 // but fact is all the primes of the RNS system are bigger
                 // than the modulus used to compute _Fc, we just copy the result for everybody.
                 for (auto i = 0u; i < _n; ++i) {
-                    setRNSMatrixElementAllResidues(_rnsR, _n, i, j, FR[i]);
-                    setRNSMatrixElementAllResidues(_rnsc, _n, i, j, Fc[i]);
+                    setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
+                    setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]);
                 }
             }
 
@@ -349,37 +349,17 @@ namespace LinBox {
 
             // r <= Q + (R - A c) / p
 
-            std::cout << "A" << std::endl;
-            for (auto j = 0u; j < _n; ++j) {
-                for (auto i = 0u; i < _n; ++i) {
-                    logRNSMatrixElement(_rnsA, _n, i, j);
-                }
-            }
-
-            std::cout << "c" << std::endl;
-            for (auto j = 0u; j < _primesCount; ++j) {
-                for (auto i = 0u; i < _n; ++i) {
-                    logRNSMatrixElement(_rnsc, _n, i, j);
-                }
-            }
-
             // By first computing R <= R - A c as a fgemm within the RNS domain.
-            FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n,
-                         _rnsDomain->mOne, _rnsA, _n, _rnsc, _n, _rnsDomain->one,
-                         _rnsR, _n);
-
-            std::cout << "R = Ac" << std::endl;
-            for (auto j = 0u; j < _primesCount; ++j) {
-                for (auto i = 0u; i < _n; ++i) {
-                    logRNSMatrixElement(_rnsR, _n, i, j);
-                }
-            }
+            // @fixme Use parallel helper!
+            FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
+                         _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one,
+                         _rnsR, _primesCount);
 
             // We divide each residues by the according pj, which is done by multiplying.
             // @fixme Could be done in parallel!
             for (auto j = 0u; j < _primesCount; ++j) {
                 for (auto i = 0u; i < _n; ++i) {
-                    auto& rnsElement = _rnsR[i * _n + j];
+                    auto& rnsElement = _rnsR[i * _primesCount + j];
                     auto stride = rnsElement._stride;
                     for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                         auto& rnsF = _rnsSystem->_field_rns[h];

From c415b719c6545dcc3aa93efaa7c888d56cebb9bc Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 13 Jun 2019 16:14:14 +0200
Subject: [PATCH 27/63] Fixed wrong leading dimension for accessing residue
 element

---
 linbox/algorithms/multi-mod-lifting-container.h | 12 +++++++++---
 linbox/solutions/solve/solve-dixon-rns.h        |  4 ++--
 tests/test-solve-full.C                         |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index d350aec16..cf6269ea3 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -96,11 +96,12 @@ namespace LinBox {
         {
             linbox_check(A.rowdim() == A.coldim());
 
+            std::cout << "----------" << std::endl;
             A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
             std::cout << "b: " << b << std::endl;
 
             // This will contain the primes or our MultiMod basis
-            // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?)
+            // @fixme Pass the count through Method::DixonRNS (and rename it Method::DixonMultiMod?)
             _primesCount = 2;
             _primes.resize(_primesCount);
             std::cout << "primesCount: " << _primesCount << std::endl;
@@ -116,6 +117,7 @@ namespace LinBox {
                 double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n));
                 _rnsPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
                 _rnsPrimes.resize(_rnsPrimesCount);
+                std::cout << "primeGenerator.getBits(): " << primeGenerator.getBits() << std::endl;
                 std::cout << "rnsBasisPrimesCount: " << _rnsPrimesCount << std::endl;
 
                 std::vector<double> primes;
@@ -226,7 +228,11 @@ namespace LinBox {
                 double log2P = Givaro::logtwo(_primesProduct);
                 // _iterationsCount = log2(2 * N * D) / log2(p)
                 _log2Bound = hb.solutionLogBound;
-                _iterationsCount = std::ceil(_log2Bound / log2P);
+
+                // @fixme @cpernet @jgdumas Is this computation wrong?
+                // I have to increase the number of iterations when the bitsize of the vector
+                // is big, maybe there is something wrong with the Hadamard bound.
+                _iterationsCount = std::ceil(_log2Bound / log2P) + 2;
                 std::cout << "iterationsCount: " << _iterationsCount << std::endl;
 
                 // @fixme Fact is RationalReconstruction which needs numbound and denbound
@@ -376,7 +382,7 @@ namespace LinBox {
                 // r <- (R - Ac) / p
                 // @fixme @cpernet Don't know how to do that with one fconvert_rns!
                 for (auto i = 0u; i < _n; ++i) {
-                    FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _n + j));
+                    FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _primesCount + j));
                 }
 
                 // r <- Q + (R - Ac) / p
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 630ac235d..71faf0627 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -139,7 +139,7 @@ namespace LinBox {
     void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
                const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
     {
-        commentator().start("solve.dixon.integer.dense");
+        commentator().start("solve.dixon-rns.integer.dense");
 
         // @fixme We don't know if we can use ModularBalanced<double>,
         // because of the rational reconstruction which might be
@@ -151,7 +151,7 @@ namespace LinBox {
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);
 
-        commentator().stop("solve.dixon.integer.dense");
+        commentator().stop("solve.dixon-rns.integer.dense");
 
         // @fixme Implement something like that
         // if (status == SS_INCONSISTENT) {
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 5b9e5acec..363e0a781 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -263,7 +263,7 @@ int main(int argc, char** argv)
     bool ok = true;
     do {
         // // ----- Rational Auto
-        ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // // @fixme Dixon<Wiedemann> does not compile
         // // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);

From 87dbe8b7caca7bebfb2a65fefe13dafb4a1f4b39 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 14 Jun 2019 16:23:59 +0200
Subject: [PATCH 28/63] Thanks to @jgdumas, now handling rational
 reconstruction with own num bound

---
 .../algorithms/multi-mod-lifting-container.h  | 32 ++++++---------
 .../rational-cra-builder-full-multip.h        | 23 ++++++++++-
 linbox/solutions/hadamard-bound.h             |  6 +--
 linbox/solutions/solve/solve-dixon-rns.h      | 40 ++++++++++++++++++-
 4 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index cf6269ea3..e2a4876ca 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -228,19 +228,15 @@ namespace LinBox {
                 double log2P = Givaro::logtwo(_primesProduct);
                 // _iterationsCount = log2(2 * N * D) / log2(p)
                 _log2Bound = hb.solutionLogBound;
-
-                // @fixme @cpernet @jgdumas Is this computation wrong?
-                // I have to increase the number of iterations when the bitsize of the vector
-                // is big, maybe there is something wrong with the Hadamard bound.
-                _iterationsCount = std::ceil(_log2Bound / log2P) + 2;
+                _log2NumBound = hb.numLogBound;
+                _log2DenBound = hb.denLogBound;
+                std::cout << "_log2Bound: " << _log2Bound << std::endl;
+                std::cout << "_log2NumBound: " << _log2NumBound << std::endl;
+                std::cout << "_log2DenBound: " << hb.denLogBound << std::endl;
+                std::cout << "log2P: " << log2P << std::endl;
+
+                _iterationsCount = std::ceil(_log2Bound / log2P);
                 std::cout << "iterationsCount: " << _iterationsCount << std::endl;
-
-                // @fixme Fact is RationalReconstruction which needs numbound and denbound
-                // expects them to be in non-log... @fixme Still needed?
-                _ring.init(_numbound, Integer(1)
-                                          << static_cast<uint64_t>(std::ceil(hb.numLogBound)));
-                _ring.init(_denbound, Integer(1)
-                                          << static_cast<uint64_t>(std::ceil(hb.denLogBound)));
             }
 
             //----- Locals setup
@@ -292,11 +288,9 @@ namespace LinBox {
         // ----- NOT LiftingContainer API
         // ----- but still needed
 
-        const IElement& numbound() const { return _numbound; }
-
-        const IElement& denbound() const { return _denbound; }
-
         double log2Bound() const { return _log2Bound; }
+        double log2NumBound() const { return _log2NumBound; }
+        double log2DenBound() const { return _log2DenBound; }
 
         uint32_t primesCount() const { return _primesCount; }
 
@@ -414,16 +408,16 @@ namespace LinBox {
             std::cout << " -> " << reconstructedInteger << std::endl;
         }
 
-    private:
+    public: // @fixme BACK TO PRIVATE!
         const Ring& _ring;
 
         // The problem: A^{-1} * b
         const IMatrix& _A;
         const IVector& _b;
 
-        IElement _numbound;
-        IElement _denbound;
         double _log2Bound;
+        double _log2NumBound;
+        double _log2DenBound;
 
         RNSSystem* _rnsSystem = nullptr;
         RNSDomain* _rnsDomain = nullptr;
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index e1df25d35..30b38a412 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -65,11 +65,30 @@ namespace LinBox
             return num;
         }
 
+        template <class Vect>
+		Vect& result (Vect &num, Integer& den, const Integer& numBound, const Integer& denBound)
+		{
+            Father_t::result(num, false);
+            den = 1;
+            const auto& mod = Father_t::getModulus();
+            Integer nd;
+            for (auto num_it = num.begin(); num_it != num.end(); ++num_it) {
+                iterativeratrecon(*num_it, nd, den, mod, numBound, denBound);
+
+                if (nd > 1) {
+                    for (auto t02 = num.begin(); t02 != num_it; ++t02)
+                        *t02 *= nd;
+                    den *= nd;
+                }
+            }
+            return num;
+        }
+
 	protected:
-		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& s)
+		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn, const Integer& sd)
 		{
 			Integer a;
-			_ZZ.reconstructRational(a, new_den, u1*=old_den, m1, s);
+			_ZZ.reconstructRational(a, new_den, u1*=old_den, m1, sn, sd);
 			return u1=a;
 		}
 	};
diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h
index 48b9bcf55..a003aaf22 100644
--- a/linbox/solutions/hadamard-bound.h
+++ b/linbox/solutions/hadamard-bound.h
@@ -51,7 +51,7 @@ namespace LinBox {
         }
 #ifdef DEBUG_HADAMARD_BOUND
         std::clog << "normSquared:=" << normSquared << ';' << std::endl;
-        std::clog << "vectorLogNorm:=" << (Givaro::logtwo(normSquared) / 2.0) << ';' << std::endl;
+        std::clog << "vectorLogNorm:=" << Givaro::logtwo(normSquared) / 2.0 << ';' << std::endl;
 #endif
         logNorm = Givaro::logtwo(normSquared) / 2.0;
         return true;
@@ -423,9 +423,9 @@ namespace LinBox {
         double bLogNorm;
         vectorLogNorm(bLogNorm, b.begin(), b.end());
 
-        data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm + 1.0;
+        data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm;
         data.denLogBound = hadamardBound.logBound;
-        data.solutionLogBound = data.numLogBound + data.denLogBound + 1.0;
+        data.solutionLogBound = 1.0 + data.numLogBound + data.denLogBound; // log2(2 * N * D)
 
 #ifdef DEBUG_HADAMARD_BOUND
         std::clog << "numLogBound:=" << data.numLogBound << ';' << std::endl;
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 71faf0627..34c336db0 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -86,8 +86,41 @@ namespace LinBox {
                 craBuilder.progress(field, padicAccumulations[j]);
             }
 
+
+            for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                auto Cj = padicAccumulations[j];
+                auto xxx = (_lc._A.getEntry(0, 0) * Cj[0] - _lc._b[0]) % radices[j];
+                std::cout << "xxx " << j << " " << xxx << std::endl;
+            }
+
             // Rational reconstruction
-            craBuilder.result(xNum, xDen);
+            Integer numBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound())));
+            Integer denBound = (Integer(1) << size_t(std::ceil(_lc.log2DenBound())));
+
+            // @todo @cleanup Do the same for denBound ?
+            // The following finds the closest Integer that satisfies 2 ^ exponent.
+            // This is done by dichotomy, going from floor to ceil.
+
+            Integer minNumBound = (Integer(1) << size_t(std::floor(_lc.log2NumBound())));
+            Integer maxNumBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound())));
+            auto middleNumBound = (minNumBound + maxNumBound);
+            double l = _lc.log2NumBound();
+            double lm = Givaro::logtwo(middleNumBound) - 1;
+            while (minNumBound < maxNumBound) {
+                if (lm > l) {
+                    maxNumBound = middleNumBound / 2;
+                }
+                else if (lm < l) {
+                    minNumBound = middleNumBound / 2;
+                }
+                else {
+                    break;
+                }
+                middleNumBound = (minNumBound + maxNumBound);
+                lm = Givaro::logtwo(middleNumBound) - 1;
+            }
+
+            craBuilder.result(xNum, xDen, middleNumBound / 2, denBound);
 
             return true;
         }
@@ -125,6 +158,11 @@ namespace LinBox {
             if (!re.getRational(xNum, xDen)) {
                 std::cerr << "OUCH!" << std::endl;
             }
+
+// #ifdef DEBUG_HADAMARD_BOUND
+            std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << ';' << std::endl;
+            std::clog << "denLog " << Givaro::logtwo(xDen) << ';' << std::endl;
+// #endif
         }
 
     private:

From 0e66c33c014e07f4de3577252f8b0615464429f3 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 18 Jun 2019 10:33:51 +0200
Subject: [PATCH 29/63] Fixed a bunch of cases with b very different of A

---
 linbox/solutions/solve/solve-dixon-rns.h | 93 ++++++++++++++++--------
 1 file changed, 64 insertions(+), 29 deletions(-)

diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 34c336db0..f4a4837cb 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -25,6 +25,65 @@
 #include <linbox/algorithms/multi-mod-lifting-container.h>
 
 namespace LinBox {
+    // @todo @cleanup Move that somewhere inside Givaro?
+    // Find the closest upper bound Integer that satisfies 2 ^ exponent.
+    // This is done by dichotomy, going from floor to ceil.
+    Integer twoPower(double exponent)
+    {
+        // @note Is the exponent is small, we will be extra precise,
+        // otherwise, we over estimate the exponent a bit,
+        // so that results are all right with rational reconstruction.
+        // The reason being that RR does has to be very precise for small
+        // values so that it does not go too far.
+        // And, RR also need to go far enough, the exponent not being very precise
+        // for big values.
+        // @fixme This is hard-coded... That's sad. What does this mean really?
+        if (exponent > 20.) {
+            exponent *= 1.0001;
+        }
+
+        Integer min = (Integer(1) << uint64_t(std::floor(exponent)));
+        Integer max = (Integer(1) << uint64_t(std::ceil(exponent)));
+
+        // To keep full precision, we do not divide by two here,
+        // but just the computed exponent.
+        Integer target = min + max;
+        Integer lastKnownTarget = target;
+        double targetExponent = 0.0;
+
+        while (min < max) {
+            targetExponent = Givaro::logtwo(target) - 1;
+            if (targetExponent > exponent) {
+                max = (target + 1) / 2;
+            }
+            else if (targetExponent < exponent) {
+                min = target / 2;
+            }
+            else {
+                break;
+            }
+
+            target = min + max;
+
+            // Get out if we're lock in an infinite loop
+            if (lastKnownTarget == target) {
+                break;
+            }
+            lastKnownTarget = target;
+        }
+
+        // Find the smallest value that satisfies the upper
+        // evaluation of the exponent.
+        if (Givaro::logtwo(min) >= exponent) {
+            return min;
+        } else if (Givaro::logtwo(target / 2) >= exponent) {
+            return target / 2;
+        }
+        else {
+            return max;
+        }
+    }
+
     /**
      * From a MultiModLiftingContainer, will build
      * the solution on each prime, then will do a CRT reconstruction,
@@ -86,7 +145,6 @@ namespace LinBox {
                 craBuilder.progress(field, padicAccumulations[j]);
             }
 
-
             for (auto j = 0u; j < _lc.primesCount(); ++j) {
                 auto Cj = padicAccumulations[j];
                 auto xxx = (_lc._A.getEntry(0, 0) * Cj[0] - _lc._b[0]) % radices[j];
@@ -94,33 +152,10 @@ namespace LinBox {
             }
 
             // Rational reconstruction
-            Integer numBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound())));
-            Integer denBound = (Integer(1) << size_t(std::ceil(_lc.log2DenBound())));
-
-            // @todo @cleanup Do the same for denBound ?
-            // The following finds the closest Integer that satisfies 2 ^ exponent.
-            // This is done by dichotomy, going from floor to ceil.
-
-            Integer minNumBound = (Integer(1) << size_t(std::floor(_lc.log2NumBound())));
-            Integer maxNumBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound())));
-            auto middleNumBound = (minNumBound + maxNumBound);
-            double l = _lc.log2NumBound();
-            double lm = Givaro::logtwo(middleNumBound) - 1;
-            while (minNumBound < maxNumBound) {
-                if (lm > l) {
-                    maxNumBound = middleNumBound / 2;
-                }
-                else if (lm < l) {
-                    minNumBound = middleNumBound / 2;
-                }
-                else {
-                    break;
-                }
-                middleNumBound = (minNumBound + maxNumBound);
-                lm = Givaro::logtwo(middleNumBound) - 1;
-            }
+            Integer numBound = twoPower(_lc.log2NumBound());
+            Integer denBound = twoPower(_lc.log2DenBound());
 
-            craBuilder.result(xNum, xDen, middleNumBound / 2, denBound);
+            craBuilder.result(xNum, xDen, numBound, denBound);
 
             return true;
         }
@@ -159,10 +194,10 @@ namespace LinBox {
                 std::cerr << "OUCH!" << std::endl;
             }
 
-// #ifdef DEBUG_HADAMARD_BOUND
+            // #ifdef DEBUG_HADAMARD_BOUND
             std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << ';' << std::endl;
             std::clog << "denLog " << Givaro::logtwo(xDen) << ';' << std::endl;
-// #endif
+            // #endif
         }
 
     private:

From 5f1b54a4ec55d571966492d39a1483411806b5aa Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 18 Jun 2019 15:00:17 +0200
Subject: [PATCH 30/63] Switched to exact value for HadamardBound, simplifying
 the rat recon step

---
 .../algorithms/multi-mod-lifting-container.h  |  31 +-
 .../rational-cra-builder-full-multip.h        |  12 +-
 linbox/solutions/hadamard-bound.h             | 321 +++++++++---------
 linbox/solutions/solve/solve-dixon-rns.h      |  89 +----
 4 files changed, 215 insertions(+), 238 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index e2a4876ca..910110d03 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -153,7 +153,7 @@ namespace LinBox {
                     if (bitSize > rnsBasisBitSize && h > 0) {
                         _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (h - 1));
                         _rnsPrimesCount -= h;
-                        std::cout << "RNS basis: Erasing extra " << h << "primes." << std::endl;
+                        std::cout << "RNS basis: Erasing extra " << h << " primes." << std::endl;
                         break;
                     }
                 }
@@ -228,11 +228,11 @@ namespace LinBox {
                 double log2P = Givaro::logtwo(_primesProduct);
                 // _iterationsCount = log2(2 * N * D) / log2(p)
                 _log2Bound = hb.solutionLogBound;
-                _log2NumBound = hb.numLogBound;
-                _log2DenBound = hb.denLogBound;
+                _numBound = hb.numBound;
+                _denBound = hb.denBound;
                 std::cout << "_log2Bound: " << _log2Bound << std::endl;
-                std::cout << "_log2NumBound: " << _log2NumBound << std::endl;
-                std::cout << "_log2DenBound: " << hb.denLogBound << std::endl;
+                std::cout << "_numBound: " << _numBound << std::endl;
+                std::cout << "_denBound: " << _denBound << std::endl;
                 std::cout << "log2P: " << log2P << std::endl;
 
                 _iterationsCount = std::ceil(_log2Bound / log2P);
@@ -289,8 +289,8 @@ namespace LinBox {
         // ----- but still needed
 
         double log2Bound() const { return _log2Bound; }
-        double log2NumBound() const { return _log2NumBound; }
-        double log2DenBound() const { return _log2DenBound; }
+        Integer numBound() const { return _numBound; }
+        Integer denBound() const { return _denBound; }
 
         uint32_t primesCount() const { return _primesCount; }
 
@@ -320,6 +320,8 @@ namespace LinBox {
                     // @fixme @cpernet Is this OK for any Ring or should we be sure we are using
                     // Integers?
                     _ring.quoRem(Q[i], R[i], r[i], pj);
+                    // std::cout << "Q" << j << " " << Q[i] << std::endl;
+                    // std::cout << "R" << j << " " << R[i] << std::endl;
                 }
 
                 // Convert R to the field
@@ -336,6 +338,9 @@ namespace LinBox {
                 // would do the trick
                 digits[j] = IVector(_ring, Fc);
 
+                // auto ooo = (_A.getEntry(0, 0) * Integer(digits[j][0]) - r[0]) % Integer(pj);
+                // std::cout << "ooo " << j << " " << ooo << std::endl;
+
                 // Store the very same result in an RNS system,
                 // but fact is all the primes of the RNS system are bigger
                 // than the modulus used to compute _Fc, we just copy the result for everybody.
@@ -373,6 +378,9 @@ namespace LinBox {
                 auto& r = _r[j];
                 auto& Q = _Q[j];
 
+                // std::cout << "old r" << j << " " << r[0] << std::endl;
+                // std::cout << "r" << j << " " << (r[0] - _A.getEntry(0, 0) * Integer(_Fc[j][0])) / Integer(_primes[j])  << " expected" << std::endl;
+
                 // r <- (R - Ac) / p
                 // @fixme @cpernet Don't know how to do that with one fconvert_rns!
                 for (auto i = 0u; i < _n; ++i) {
@@ -380,7 +388,12 @@ namespace LinBox {
                 }
 
                 // r <- Q + (R - Ac) / p
+                // std::cout << "p" << j << " " << Integer(_primes[j]) << std::endl;
+                // std::cout << "c" << j << " " << Integer(_Fc[j][0]) << std::endl;
+
                 IVD.addin(r, Q);
+
+                // std::cout << "r" << j << " " << r[0] << std::endl;
             }
 
             ++_position;
@@ -416,8 +429,8 @@ namespace LinBox {
         const IVector& _b;
 
         double _log2Bound;
-        double _log2NumBound;
-        double _log2DenBound;
+        Integer _numBound;
+        Integer _denBound;
 
         RNSSystem* _rnsSystem = nullptr;
         RNSDomain* _rnsDomain = nullptr;
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index 30b38a412..fb62b3941 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -68,6 +68,9 @@ namespace LinBox
         template <class Vect>
 		Vect& result (Vect &num, Integer& den, const Integer& numBound, const Integer& denBound)
 		{
+            // std::cout << "numBound " << numBound << std::endl;
+            // std::cout << "denBound " << denBound << std::endl;
+
             Father_t::result(num, false);
             den = 1;
             const auto& mod = Father_t::getModulus();
@@ -87,9 +90,12 @@ namespace LinBox
 	protected:
 		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn, const Integer& sd)
 		{
-			Integer a;
-			_ZZ.reconstructRational(a, new_den, u1*=old_den, m1, sn, sd);
-			return u1=a;
+            // @note This interface of the rational does the RatRecon.
+            Givaro::Rational myRational(u1 *= old_den, m1, sn, false);
+
+            u1 = myRational.nume();
+            new_den = myRational.deno();
+			return u1;
 		}
 	};
 }
diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h
index a003aaf22..891dbcd5f 100644
--- a/linbox/solutions/hadamard-bound.h
+++ b/linbox/solutions/hadamard-bound.h
@@ -33,120 +33,114 @@ namespace LinBox {
 
     // ----- Vector norm
 
-    // Returns false if the vector is null, true otherwise
     template <class ConstIterator>
-    bool vectorLogNorm(double& logNorm, const ConstIterator& begin, const ConstIterator& end)
+    void vectorNormSquared(Integer& normSquared, const ConstIterator& begin,
+                           const ConstIterator& end)
     {
-        Integer normSquared = 0;
+        normSquared = 0;
         for (ConstIterator it = begin; it != end; ++it) {
             // Whatever field element it is,
             // it should be able to store the square without
             // loss of information.
             normSquared += (*it) * (*it);
         }
-
-        if (normSquared == 0) {
-            logNorm = 0.0;
-            return false; // Vector is zero
-        }
-#ifdef DEBUG_HADAMARD_BOUND
-        std::clog << "normSquared:=" << normSquared << ';' << std::endl;
-        std::clog << "vectorLogNorm:=" << Givaro::logtwo(normSquared) / 2.0 << ';' << std::endl;
-#endif
-        logNorm = Givaro::logtwo(normSquared) / 2.0;
-        return true;
     }
 
     // ----- Detailed Hadamard bound
 
-    struct HadamardLogBoundDetails {
+    struct HadamarBoundDetails {
         /**
-         * Bit size of the minimal hadamard bound
+         * The minimal hadamard bound
          * between the row-wise and the col-wise ones.
          *
          * min { HadamardRow(A), HadamardCol(A) }
          */
-        double logBound;
+        Integer bound;
+
         /**
-         * Bit size of the minimal hadamard bound
+         * The minimal hadamard bound
          * divided by the min of the norm vectors
          * between the row-wise and the col-wise ones.
          *
          * min { HadamardRow(A) / min || Ai,* ||,
          *       HadamardCol(A) / min || A*,j ||  }
          */
-        double logBoundOverMinNorm;
+        Integer boundOverMinNorm;
     };
 
     /**
      * Precise Hadamard bound (bound on determinant) by taking
      * the row-wise euclidean norm.
-     *
-     * The result is expressed as bit size.
      */
     template <class IMatrix>
-    void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A)
+    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A)
     {
         typename MatrixTraits<IMatrix>::MatrixCategory tag;
-        HadamardRowLogBound(logBound, minLogNorm, A, tag);
+        HadamardRowBound(bound, minNormSquared, A, tag);
     }
 
     template <class IMatrix>
-    void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag)
+    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+                          const MatrixCategories::RowColMatrixTag& tag)
     {
-        logBound = 0.0;
-        minLogNorm = std::numeric_limits<double>::infinity();
+        bound = 1;
+        minNormSquared = -1;
 
         for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) {
-            double rowLogNorm;
-            if (vectorLogNorm(rowLogNorm, rowIt->begin(), rowIt->end())) {
-                if (rowLogNorm < minLogNorm) {
-                    minLogNorm = rowLogNorm;
-                }
-            }
-            else {
-                logBound = 0.0;
-                minLogNorm = 0.0;
+            Integer rowNormSquared;
+            vectorNormSquared(rowNormSquared, rowIt->begin(), rowIt->end());
+
+            if (rowNormSquared == 0) {
+                bound = 0;
+                minNormSquared = 0;
                 return;
             }
-            logBound += rowLogNorm;
+
+            if (minNormSquared < 0 || rowNormSquared < minNormSquared) {
+                minNormSquared = rowNormSquared;
+            }
+
+            bound *= rowNormSquared;
         }
+
+        bound = Givaro::sqrt(bound);
     }
 
     template <class IMatrix>
-    void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag)
+    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+                          const MatrixCategories::RowMatrixTag& tag)
     {
-        logBound = 0.0;
-        minLogNorm = std::numeric_limits<double>::infinity();
+        bound = 1;
+        minNormSquared = -1;
 
         for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) {
             Integer normSquared = 0;
             for (const auto& pair : *rowIt) {
                 normSquared += (pair.second) * (pair.second);
             }
+
             if (normSquared == 0) {
-                logBound = 0.0;
-                minLogNorm = 0.0;
+                bound = 0;
+                minNormSquared = 0;
                 return;
             }
 
-            double logNormSquared = Givaro::logtwo(normSquared);
-            if (logNormSquared < minLogNorm) {
-                minLogNorm = logNormSquared;
+            if (minNormSquared < 0 || normSquared < minNormSquared) {
+                minNormSquared = normSquared;
             }
-            logBound += logNormSquared;
+
+            bound *= normSquared;
         }
 
-        // Square-root
-        logBound /= 2.0;
-        minLogNorm /= 2.0;
+        bound = Givaro::sqrt(bound);
     }
 
     template <class IMatrix>
-    void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::BlackboxTag& tag)
+    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+                          const MatrixCategories::BlackboxTag& tag)
     {
         DenseMatrix<typename IMatrix::Field> ACopy(A);
-        HadamardRowLogBound(logBound, minLogNorm, ACopy);
+        HadamardRowBound(bound, minNormSquared, ACopy);
     }
 
     /**
@@ -156,40 +150,46 @@ namespace LinBox {
      * The result is expressed as bit size.
      */
     template <class IMatrix>
-    void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A)
+    void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A)
     {
         typename MatrixTraits<IMatrix>::MatrixCategory tag;
-        HadamardColLogBound(logBound, minLogNorm, A, tag);
+        HadamardColBound(bound, minNormSquared, A, tag);
     }
 
     template <class IMatrix>
-    void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag)
+    void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+                          const MatrixCategories::RowColMatrixTag& tag)
     {
-        logBound = 0.0;
-        minLogNorm = std::numeric_limits<double>::infinity();
+        bound = 1;
+        minNormSquared = -1;
 
         typename IMatrix::ConstColIterator colIt;
         for (colIt = A.colBegin(); colIt != A.colEnd(); ++colIt) {
-            double colLogNorm;
-            if (vectorLogNorm(colLogNorm, colIt->begin(), colIt->end())) {
-                if (colLogNorm < minLogNorm) {
-                    minLogNorm = colLogNorm;
-                }
-            }
-            else {
-                logBound = 0.0;
-                minLogNorm = 0.0;
+            Integer colNormSquared;
+            vectorNormSquared(colNormSquared, colIt->begin(), colIt->end());
+
+            if (colNormSquared == 0) {
+                bound = 0;
+                minNormSquared = 0;
                 return;
             }
-            logBound += colLogNorm;
+
+            if (minNormSquared < 0 || colNormSquared < minNormSquared) {
+                minNormSquared = colNormSquared;
+            }
+
+            bound *= colNormSquared;
         }
+
+        bound = Givaro::sqrt(bound);
     }
 
     template <class IMatrix>
-    void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag)
+    void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+                          const MatrixCategories::RowMatrixTag& tag)
     {
-        logBound = 0.0;
-        minLogNorm = std::numeric_limits<double>::infinity();
+        bound = 1;
+        minNormSquared = -1;
 
         // This vector contains the norm squared for each columns.
         std::vector<Integer> columnsNormsSquared(A.coldim());
@@ -200,30 +200,31 @@ namespace LinBox {
         }
 
         // All the norms have been computed, we check which one is the smallest
-        // and compute the product (aka sum bitsize-wise) of them to make the logBound.
+        // and compute the product (aka sum bitsize-wise) of them to make the bound.
         for (const Integer& normSquared : columnsNormsSquared) {
             if (normSquared == 0) {
-                logBound = 0.0;
-                minLogNorm = 0.0;
+                bound = 0;
+                minNormSquared = 0;
                 return;
             }
-            double logNormSquared = Givaro::logtwo(normSquared);
-            if (logNormSquared < minLogNorm) {
-                minLogNorm = logNormSquared;
+
+            if (minNormSquared < 0 || normSquared < minNormSquared) {
+                minNormSquared = normSquared;
             }
-            logBound += logNormSquared;
+
+            bound *= normSquared;
         }
 
         // Square-root
-        logBound /= 2.0;
-        minLogNorm /= 2.0;
+        bound = Givaro::sqrt(bound);
     }
 
     template <class IMatrix>
-    void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::BlackboxTag& tag)
+    void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+                          const MatrixCategories::BlackboxTag& tag)
     {
         DenseMatrix<typename IMatrix::Field> ACopy(A);
-        HadamardColLogBound(logBound, minLogNorm, ACopy);
+        HadamardColBound(bound, minNormSquared, ACopy);
     }
 
     /**
@@ -233,34 +234,35 @@ namespace LinBox {
      * The results are expressed as bit size.
      */
     template <class IMatrix>
-    HadamardLogBoundDetails DetailedHadamardBound(const IMatrix& A)
+    HadamarBoundDetails DetailedHadamardBound(const IMatrix& A)
     {
-        double rowLogBound = 0.0;
-        double rowMinLogNorm = 0.0;
-        HadamardRowLogBound(rowLogBound, rowMinLogNorm, A);
-        double rowLogBoundOverMinNorm = rowLogBound - rowMinLogNorm;
+        Integer rowBound;
+        Integer rowMinNormSquared;
+        HadamardRowBound(rowBound, rowMinNormSquared, A);
+        Integer rowBoundOverMinNorm = rowBound / Givaro::sqrt(rowMinNormSquared);
 #ifdef DEBUG_HADAMARD_BOUND
-        std::clog << "rowLogBound:=" << rowLogBound << ';' << std::endl;
-        std::clog << "rowMinLogNorm:=" << rowMinLogNorm << ';' << std::endl;
-        std::clog << "rowLogBoundOverMinNorm:=" << rowLogBoundOverMinNorm << ';' << std::endl;
+        std::clog << "rowBound:=" << rowBound << ';' << std::endl;
+        std::clog << "rowMinNormSquared:=" << rowMinNormSquared << ';' << std::endl;
+        std::clog << "rowBoundOverMinNorm:=" << rowBoundOverMinNorm << ';' << std::endl;
 #endif
 
-        double colLogBound = 0.0;
-        double colMinLogNorm = 0.0;
-        HadamardColLogBound(colLogBound, colMinLogNorm, A);
-        double colLogBoundOverMinNorm = colLogBound - colMinLogNorm;
+        Integer colBound;
+        Integer colMinNormSquared;
+        HadamardColBound(colBound, colMinNormSquared, A);
+        Integer colBoundOverMinNorm = colBound / Givaro::sqrt(colMinNormSquared);
 #ifdef DEBUG_HADAMARD_BOUND
-        std::clog << "colLogBound:=" << colLogBound << ';' << std::endl;
-        std::clog << "colMinLogNorm:=" << colMinLogNorm << ';' << std::endl;
-        std::clog << "colLogBoundOverMinNorm:=" << colLogBoundOverMinNorm << ';' << std::endl;
+        std::clog << "colBound:=" << colBound << ';' << std::endl;
+        std::clog << "colMinNormSquared:=" << colMinNormSquared << ';' << std::endl;
+        std::clog << "colBoundOverMinNorm:=" << colBoundOverMinNorm << ';' << std::endl;
 #endif
 
-        HadamardLogBoundDetails data;
-        data.logBound = std::min(rowLogBound, colLogBound);
-        data.logBoundOverMinNorm = std::min(rowLogBoundOverMinNorm, colLogBoundOverMinNorm);
+        HadamarBoundDetails data;
+        data.bound = (rowBound < colBound) ? rowBound : colBound;
+        data.boundOverMinNorm =
+            (rowBoundOverMinNorm < colBoundOverMinNorm) ? rowBoundOverMinNorm : colBoundOverMinNorm;
 #ifdef DEBUG_HADAMARD_BOUND
-        std::clog << "logBound:=" << data.logBound << ';' << std::endl;
-        std::clog << "logBoundOverMinNorm:=" << data.logBoundOverMinNorm << ';' << std::endl;
+        std::clog << "bound:=" << data.bound << ';' << std::endl;
+        std::clog << "boundOverMinNorm:=" << data.boundOverMinNorm << ';' << std::endl;
 #endif
 
         return data;
@@ -277,13 +279,14 @@ namespace LinBox {
     template <class IMatrix>
     double HadamardBound(const IMatrix& A)
     {
-        return DetailedHadamardBound(A).logBound;
+        return DetailedHadamardBound(A).bound;
     }
 
     // ----- Fast Hadamard bound
 
     template <class IMatrix>
-    inline Integer& InfinityNorm(Integer& max, const IMatrix& A) {
+    inline Integer& InfinityNorm(Integer& max, const IMatrix& A)
+    {
         typename MatrixTraits<IMatrix>::MatrixCategory tag;
         return InfinityNorm(max, A, tag);
     }
@@ -299,7 +302,8 @@ namespace LinBox {
     }
 
     template <class IMatrix>
-    inline Integer& InfinityNorm(Integer& max, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag)
+    inline Integer& InfinityNorm(Integer& max, const IMatrix& A,
+                                 const MatrixCategories::RowColMatrixTag& tag)
     {
         max = 0;
         for (auto it = A.Begin(); it != A.End(); ++it) {
@@ -313,95 +317,96 @@ namespace LinBox {
         return max;
     }
 
-     /**
-      * Returns the bit size of the Hadamard bound.
-      * This is a larger estimation but faster to compute.
-      */
+    /**
+     * Returns the bit size of the Hadamard bound.
+     * This is a larger estimation but faster to compute.
+     */
     template <class IMatrix>
-    inline double FastHadamardBound(const IMatrix& A, const Integer& infnorm)
+    inline double FastHadamardLogBound(const IMatrix& A, const Integer& infinityNorm)
     {
-        if (infnorm == 0) {
+        if (infinityNorm == 0) {
             return 0.0;
         }
 
         uint64_t n = std::max(A.rowdim(), A.coldim());
-        double logBound = static_cast<double>(n) * (Givaro::logtwo(n) / 2.0 + Givaro::logtwo(infnorm));
-        return logBound;
+        double bound =
+            static_cast<double>(n) * (Givaro::logtwo(n) / 2.0 + Givaro::logtwo(infinityNorm));
+        return bound;
     }
 
     template <class IMatrix>
-    inline double FastHadamardBound(const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag)
+    inline double FastHadamardLogBound(const IMatrix& A,
+                                       const MatrixCategories::RowColMatrixTag& tag)
     {
-        Integer infnorm;
-        InfinityNorm(infnorm, A, tag);
-        return FastHadamardBound(A, infnorm);
+        Integer infinityNorm;
+        InfinityNorm(infinityNorm, A, tag);
+        return FastHadamardLogBound(A, infinityNorm);
     }
 
     template <class IMatrix>
-    inline double FastHadamardBound(const IMatrix& A, const MatrixCategories::BlackboxTag& tag)
+    inline double FastHadamardLogBound(const IMatrix& A, const MatrixCategories::BlackboxTag& tag)
     {
         DenseMatrix<typename IMatrix::Field> ACopy(A);
-        return FastHadamardBound(ACopy);
+        return FastHadamardLogBound(ACopy);
     }
 
     template <class IMatrix>
-    inline double FastHadamardBound(const IMatrix& A)
+    inline double FastHadamardLogBound(const IMatrix& A)
     {
         typename MatrixTraits<IMatrix>::MatrixCategory tag;
-        return FastHadamardBound(A, tag);
+        return FastHadamardLogBound(A, tag);
     }
 
-        /**
-         * Bound on the coefficients of the characteristic polynomial
-         * @bib "Efficient Computation of the Characteristic Polynomial". Dumas Pernet Wan ISSAC'05.
-         *
-         */
+    /**
+     * Bound on the coefficients of the characteristic polynomial
+     * @bib "Efficient Computation of the Characteristic Polynomial". Dumas Pernet Wan ISSAC'05.
+     *
+     */
     template <class IMatrix>
-    inline double FastCharPolyDumasPernetWanBound(const IMatrix& A, const Integer& infnorm)
+    inline double FastCharPolyDumasPernetWanBound(const IMatrix& A, const Integer& infinityNorm)
     {
-		// .105815875 = 0.21163275 / 2
-        return FastHadamardBound(A, infnorm) + A.coldim()*.105815875;
+        // .105815875 = 0.21163275 / 2
+        return FastHadamardLogBound(A, infinityNorm) + A.coldim() * .105815875;
     }
 
-        /**
-         * A.J. Goldstein et R.L. Graham.
-         * A Hadamard-type bound on the coefficients of
-         * a determinant of polynomials.
-         * SIAM Review, volume 15, 1973, pages 657-658.
-         *
-         */
+    /**
+     * A.J. Goldstein et R.L. Graham.
+     * A Hadamard-type bound on the coefficients of
+     * a determinant of polynomials.
+     * SIAM Review, volume 15, 1973, pages 657-658.
+     *
+     */
     template <class IMatrix>
-    inline double FastCharPolyGoldsteinGrahamBound(const IMatrix& A, const Integer& infnorm)
+    inline double FastCharPolyGoldsteinGrahamBound(const IMatrix& A, const Integer& infinityNorm)
     {
-        Integer ggb(infnorm);
+        Integer ggb(infinityNorm);
         ggb *= static_cast<uint64_t>(A.coldim());
         ggb += 2;
-        ggb *= infnorm;
+        ggb *= infinityNorm;
         ++ggb;
-        return Givaro::logtwo(ggb)*A.coldim()/2.0;
+        return Givaro::logtwo(ggb) * A.coldim() / 2.0;
     }
 
     template <class IMatrix>
     inline double FastCharPolyHadamardBound(const IMatrix& A)
     {
         typename MatrixTraits<IMatrix>::MatrixCategory tag;
-        Integer infnorm;
-        InfinityNorm(infnorm, A, tag);
-        const double DPWbound = FastCharPolyDumasPernetWanBound(A, infnorm);
-        const double GGbound = FastCharPolyGoldsteinGrahamBound(A, infnorm);
+        Integer infinityNorm;
+        InfinityNorm(infinityNorm, A, tag);
+        const double DPWbound = FastCharPolyDumasPernetWanBound(A, infinityNorm);
+        const double GGbound = FastCharPolyGoldsteinGrahamBound(A, infinityNorm);
 #ifdef DEBUG_HADAMARD_BOUND
         std::clog << "DPWbound: " << DPWbound << std::endl;
         std::clog << "GGbound : " << GGbound << std::endl;
 #endif
-        return std::min(DPWbound,GGbound);
+        return std::min(DPWbound, GGbound);
     }
 
-
     // ----- Rational solve bound
 
     struct RationalSolveHadamardBoundData {
-        double numLogBound;      // log2(N)
-        double denLogBound;      // log2(D)
+        Integer numBound;        // N
+        Integer denBound;        // D
         double solutionLogBound; // log2(2 * N * D)
     };
 
@@ -413,30 +418,38 @@ namespace LinBox {
      * @note Matrix and Vector should be over Integer.
      */
     template <class Matrix, class Vector>
-    typename std::enable_if<std::is_same<typename FieldTraits<typename Matrix::Field>::categoryTag, RingCategories::IntegerTag>::value,
+    typename std::enable_if<std::is_same<typename FieldTraits<typename Matrix::Field>::categoryTag,
+                                         RingCategories::IntegerTag>::value,
                             RationalSolveHadamardBoundData>::type
     RationalSolveHadamardBound(const Matrix& A, const Vector& b)
     {
         RationalSolveHadamardBoundData data;
 
         auto hadamardBound = DetailedHadamardBound(A);
-        double bLogNorm;
-        vectorLogNorm(bLogNorm, b.begin(), b.end());
+        Integer bNormSquared;
+        vectorNormSquared(bNormSquared, b.begin(), b.end());
 
-        data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm;
-        data.denLogBound = hadamardBound.logBound;
-        data.solutionLogBound = 1.0 + data.numLogBound + data.denLogBound; // log2(2 * N * D)
+        data.denBound = hadamardBound.bound;
+        data.numBound = hadamardBound.boundOverMinNorm * Givaro::sqrt(bNormSquared);
+        if (data.denBound == 0 || data.numBound == 0) {
+            data.solutionLogBound = 0.0;
+        }
+        else {
+            data.solutionLogBound = 1.0 + Givaro::logtwo(data.numBound)
+                                    + Givaro::logtwo(data.denBound); // log2(2 * N * D)
+        }
 
 #ifdef DEBUG_HADAMARD_BOUND
-        std::clog << "numLogBound:=" << data.numLogBound << ';' << std::endl;
-        std::clog << "denLogBound:=" << data.denLogBound << ';' << std::endl;
+        std::clog << "numBound:=" << data.numBound << ';' << std::endl;
+        std::clog << "denBound:=" << data.denBound << ';' << std::endl;
 #endif
         return data;
     }
 
     /// @fixme Needed to solve-cra.h, but can't be used yet.
     template <class Matrix, class Vector>
-    typename std::enable_if<std::is_same<typename FieldTraits<typename Matrix::Field>::categoryTag, RingCategories::RationalTag>::value,
+    typename std::enable_if<std::is_same<typename FieldTraits<typename Matrix::Field>::categoryTag,
+                                         RingCategories::RationalTag>::value,
                             RationalSolveHadamardBoundData>::type
     RationalSolveHadamardBound(const Matrix& A, const Vector& b)
     {
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index f4a4837cb..3154c1915 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -25,65 +25,6 @@
 #include <linbox/algorithms/multi-mod-lifting-container.h>
 
 namespace LinBox {
-    // @todo @cleanup Move that somewhere inside Givaro?
-    // Find the closest upper bound Integer that satisfies 2 ^ exponent.
-    // This is done by dichotomy, going from floor to ceil.
-    Integer twoPower(double exponent)
-    {
-        // @note Is the exponent is small, we will be extra precise,
-        // otherwise, we over estimate the exponent a bit,
-        // so that results are all right with rational reconstruction.
-        // The reason being that RR does has to be very precise for small
-        // values so that it does not go too far.
-        // And, RR also need to go far enough, the exponent not being very precise
-        // for big values.
-        // @fixme This is hard-coded... That's sad. What does this mean really?
-        if (exponent > 20.) {
-            exponent *= 1.0001;
-        }
-
-        Integer min = (Integer(1) << uint64_t(std::floor(exponent)));
-        Integer max = (Integer(1) << uint64_t(std::ceil(exponent)));
-
-        // To keep full precision, we do not divide by two here,
-        // but just the computed exponent.
-        Integer target = min + max;
-        Integer lastKnownTarget = target;
-        double targetExponent = 0.0;
-
-        while (min < max) {
-            targetExponent = Givaro::logtwo(target) - 1;
-            if (targetExponent > exponent) {
-                max = (target + 1) / 2;
-            }
-            else if (targetExponent < exponent) {
-                min = target / 2;
-            }
-            else {
-                break;
-            }
-
-            target = min + max;
-
-            // Get out if we're lock in an infinite loop
-            if (lastKnownTarget == target) {
-                break;
-            }
-            lastKnownTarget = target;
-        }
-
-        // Find the smallest value that satisfies the upper
-        // evaluation of the exponent.
-        if (Givaro::logtwo(min) >= exponent) {
-            return min;
-        } else if (Givaro::logtwo(target / 2) >= exponent) {
-            return target / 2;
-        }
-        else {
-            return max;
-        }
-    }
-
     /**
      * From a MultiModLiftingContainer, will build
      * the solution on each prime, then will do a CRT reconstruction,
@@ -105,6 +46,15 @@ namespace LinBox {
 
         bool getRational(IVector& xNum, IElement& xDen)
         {
+            // Early out when the numerator is bounded by zero.
+            if (_lc.numBound() == 0) {
+                for (auto i = 0u; i < _lc.length(); ++i) {
+                    _lc.ring().assign(xNum[i], _lc.ring().zero);
+                }
+                _lc.ring().assign(xDen, _lc.ring().one);
+                return true;
+            }
+
             VectorDomain<Ring> IVD(_lc.ring());
 
             // Stores each c0 + c1 pj + ... + ck pj^k for each pj
@@ -127,6 +77,9 @@ namespace LinBox {
                 for (auto j = 0u; j < _lc.primesCount(); ++j) {
                     IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
                     _lc.ring().mulin(radices[j], _lc.prime(j));
+                    auto xxx = (_lc._A.getEntry(0, 0) * padicAccumulations[j][0] - _lc._b[0]) % radices[j];
+                    // std::cout << "xxx " << j << "." << i << " " << _lc._A.getEntry(0, 0) << " * " << padicAccumulations[j][0] << " - " << _lc._b[0] << " mod " << radices[j] << std::endl;
+                    std::cout << "xxx " << j << "." << i << " " << xxx << std::endl;
                 }
             }
 
@@ -145,17 +98,9 @@ namespace LinBox {
                 craBuilder.progress(field, padicAccumulations[j]);
             }
 
-            for (auto j = 0u; j < _lc.primesCount(); ++j) {
-                auto Cj = padicAccumulations[j];
-                auto xxx = (_lc._A.getEntry(0, 0) * Cj[0] - _lc._b[0]) % radices[j];
-                std::cout << "xxx " << j << " " << xxx << std::endl;
-            }
-
             // Rational reconstruction
-            Integer numBound = twoPower(_lc.log2NumBound());
-            Integer denBound = twoPower(_lc.log2DenBound());
-
-            craBuilder.result(xNum, xDen, numBound, denBound);
+            // @note RR expects the bounds to be strict, this is why we add a + 1
+            craBuilder.result(xNum, xDen, _lc.numBound() + 1, _lc.denBound() + 1);
 
             return true;
         }
@@ -195,8 +140,8 @@ namespace LinBox {
             }
 
             // #ifdef DEBUG_HADAMARD_BOUND
-            std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << ';' << std::endl;
-            std::clog << "denLog " << Givaro::logtwo(xDen) << ';' << std::endl;
+            std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << " " << xNum[0] << ';' << std::endl;
+            std::clog << "denLog " << Givaro::logtwo(xDen) << " " << xDen << ';' << std::endl;
             // #endif
         }
 
@@ -219,7 +164,7 @@ namespace LinBox {
         // implicitly requiring 0-{p-1} representation of the p-adic sequence elements.
         using Field = Givaro::Modular<double>;
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
-        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
+        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()), 12); // @fixme REMOVE SEED
 
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);

From d614db18e40550a1345ed577267804b04e018471 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 19 Jun 2019 15:52:26 +0200
Subject: [PATCH 31/63] Fixed upstream problem by adding more primes to the RNS
 base. Fixed Hadamard bound.

---
 .../algorithms/multi-mod-lifting-container.h  | 41 ++++------
 .../rational-cra-builder-full-multip.h        | 12 +--
 linbox/solutions/hadamard-bound.h             | 81 +++++++++++--------
 linbox/solutions/solve/solve-dixon-rns.h      | 11 +--
 4 files changed, 73 insertions(+), 72 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 910110d03..4fb588f3d 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -110,15 +110,18 @@ namespace LinBox {
             Integer infinityNormA;
             InfinityNorm(infinityNormA, A);
             double logInfinityNormA = Givaro::logtwo(infinityNormA);
+            std::cout << "infinityNormA: " << infinityNormA << std::endl;
+            std::cout << "logInfinityNormA: " << logInfinityNormA << std::endl;
 
             {
                 // Based on Chen-Storjohann's paper, this is the bit size
                 // of the needed RNS basis for the residue computation
-                double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n));
-                _rnsPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits());
+                double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n)); // @fixme @jgdumas Is this OK, then?
+                _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1));
                 _rnsPrimes.resize(_rnsPrimesCount);
                 std::cout << "primeGenerator.getBits(): " << primeGenerator.getBits() << std::endl;
-                std::cout << "rnsBasisPrimesCount: " << _rnsPrimesCount << std::endl;
+                std::cout << "rnsBasisBitSize: " << rnsBasisBitSize << std::endl;
+                std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
 
                 std::vector<double> primes;
                 for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) {
@@ -148,12 +151,13 @@ namespace LinBox {
                 // as the first count was just an upper estimation.
                 double bitSize = 0.0;
                 for (int h = _rnsPrimes.size() - 1; h >= 0; --h) {
-                    bitSize += Givaro::logtwo(primes[h]);
+                    bitSize += Givaro::logtwo(_rnsPrimes[h]);
 
                     if (bitSize > rnsBasisBitSize && h > 0) {
-                        _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (h - 1));
+                        _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h);
                         _rnsPrimesCount -= h;
                         std::cout << "RNS basis: Erasing extra " << h << " primes." << std::endl;
+                        std::cout << _rnsPrimes.size() << std::endl;
                         break;
                     }
                 }
@@ -325,8 +329,7 @@ namespace LinBox {
                 }
 
                 // Convert R to the field
-                // @fixme @cpernet Could this step be ignored?
-                // If not, put that in already allocated memory, and not use a temporary here.
+                // @fixme Put that FVector in already allocated memory, and not use a temporary here.
                 auto& F = _fields[j];
                 FVector FR(F, R); // rebind
 
@@ -338,9 +341,6 @@ namespace LinBox {
                 // would do the trick
                 digits[j] = IVector(_ring, Fc);
 
-                // auto ooo = (_A.getEntry(0, 0) * Integer(digits[j][0]) - r[0]) % Integer(pj);
-                // std::cout << "ooo " << j << " " << ooo << std::endl;
-
                 // Store the very same result in an RNS system,
                 // but fact is all the primes of the RNS system are bigger
                 // than the modulus used to compute _Fc, we just copy the result for everybody.
@@ -378,22 +378,13 @@ namespace LinBox {
                 auto& r = _r[j];
                 auto& Q = _Q[j];
 
-                // std::cout << "old r" << j << " " << r[0] << std::endl;
-                // std::cout << "r" << j << " " << (r[0] - _A.getEntry(0, 0) * Integer(_Fc[j][0])) / Integer(_primes[j])  << " expected" << std::endl;
-
                 // r <- (R - Ac) / p
                 // @fixme @cpernet Don't know how to do that with one fconvert_rns!
                 for (auto i = 0u; i < _n; ++i) {
                     FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _primesCount + j));
                 }
 
-                // r <- Q + (R - Ac) / p
-                // std::cout << "p" << j << " " << Integer(_primes[j]) << std::endl;
-                // std::cout << "c" << j << " " << Integer(_Fc[j][0]) << std::endl;
-
                 IVD.addin(r, Q);
-
-                // std::cout << "r" << j << " " << r[0] << std::endl;
             }
 
             ++_position;
@@ -403,21 +394,23 @@ namespace LinBox {
     private:
         // Helper function, setting all residues of a matrix element to the very same value.
         // This doesn't check the moduli.
-        void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j,
+        inline void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j,
                                             double value)
         {
-            auto stride = A[i * lda + j]._stride;
+            auto& Aij = A[i * lda + j];
+            auto stride = Aij._stride;
             for (auto h = 0u; h < _rnsPrimesCount; ++h) {
-                A[i * lda + j]._ptr[h * stride] = value;
+                Aij._ptr[h * stride] = value;
             }
         }
 
-        void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j)
+        inline void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j)
         {
+            auto& Aij = A[i * lda + j];
             Integer reconstructedInteger;
             FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &reconstructedInteger, 1, A + (i * lda + j));
             std::cout << i << " " << j << " ";
-            _rnsDomain->write(std::cout, A[i * lda + j]);
+            _rnsDomain->write(std::cout, Aij);
             std::cout << " -> " << reconstructedInteger << std::endl;
         }
 
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index fb62b3941..6359c694e 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -66,17 +66,14 @@ namespace LinBox
         }
 
         template <class Vect>
-		Vect& result (Vect &num, Integer& den, const Integer& numBound, const Integer& denBound)
+		Vect& result (Vect &num, Integer& den, const Integer& numBound)
 		{
-            // std::cout << "numBound " << numBound << std::endl;
-            // std::cout << "denBound " << denBound << std::endl;
-
             Father_t::result(num, false);
             den = 1;
             const auto& mod = Father_t::getModulus();
             Integer nd;
             for (auto num_it = num.begin(); num_it != num.end(); ++num_it) {
-                iterativeratrecon(*num_it, nd, den, mod, numBound, denBound);
+                iterativeratrecon(*num_it, nd, den, mod, numBound);
 
                 if (nd > 1) {
                     for (auto t02 = num.begin(); t02 != num_it; ++t02)
@@ -88,11 +85,10 @@ namespace LinBox
         }
 
 	protected:
-		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn, const Integer& sd)
+		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn)
 		{
             // @note This interface of the rational does the RatRecon.
-            Givaro::Rational myRational(u1 *= old_den, m1, sn, false);
-
+            Givaro::Rational myRational(Integer::modin(u1 *= old_den, m1), m1, sn);
             u1 = myRational.nume();
             new_den = myRational.deno();
 			return u1;
diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h
index 891dbcd5f..00fe6e92e 100644
--- a/linbox/solutions/hadamard-bound.h
+++ b/linbox/solutions/hadamard-bound.h
@@ -73,18 +73,17 @@ namespace LinBox {
      * the row-wise euclidean norm.
      */
     template <class IMatrix>
-    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A)
+    void HadamardRowBound(Integer& bound, const IMatrix& A)
     {
         typename MatrixTraits<IMatrix>::MatrixCategory tag;
-        HadamardRowBound(bound, minNormSquared, A, tag);
+        HadamardRowBound(bound, A, tag);
     }
 
     template <class IMatrix>
-    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+    void HadamardRowBound(Integer& bound, const IMatrix& A,
                           const MatrixCategories::RowColMatrixTag& tag)
     {
         bound = 1;
-        minNormSquared = -1;
 
         for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) {
             Integer rowNormSquared;
@@ -92,26 +91,25 @@ namespace LinBox {
 
             if (rowNormSquared == 0) {
                 bound = 0;
-                minNormSquared = 0;
                 return;
             }
 
-            if (minNormSquared < 0 || rowNormSquared < minNormSquared) {
-                minNormSquared = rowNormSquared;
-            }
-
             bound *= rowNormSquared;
         }
 
-        bound = Givaro::sqrt(bound);
+        // Square-root (upper bound)
+        Integer rem;
+        bound = Givaro::sqrtrem(bound, rem);
+        if (rem != 0) {
+            bound += 1;
+        }
     }
 
     template <class IMatrix>
-    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+    void HadamardRowBound(Integer& bound, const IMatrix& A,
                           const MatrixCategories::RowMatrixTag& tag)
     {
         bound = 1;
-        minNormSquared = -1;
 
         for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) {
             Integer normSquared = 0;
@@ -121,26 +119,26 @@ namespace LinBox {
 
             if (normSquared == 0) {
                 bound = 0;
-                minNormSquared = 0;
                 return;
             }
 
-            if (minNormSquared < 0 || normSquared < minNormSquared) {
-                minNormSquared = normSquared;
-            }
-
             bound *= normSquared;
         }
 
-        bound = Givaro::sqrt(bound);
+        // Square-root (upper bound)
+        Integer rem;
+        bound = Givaro::sqrtrem(bound, rem);
+        if (rem != 0) {
+            bound += 1;
+        }
     }
 
     template <class IMatrix>
-    void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A,
+    void HadamardRowBound(Integer& bound, const IMatrix& A,
                           const MatrixCategories::BlackboxTag& tag)
     {
         DenseMatrix<typename IMatrix::Field> ACopy(A);
-        HadamardRowBound(bound, minNormSquared, ACopy);
+        HadamardRowBound(bound, ACopy);
     }
 
     /**
@@ -181,7 +179,12 @@ namespace LinBox {
             bound *= colNormSquared;
         }
 
-        bound = Givaro::sqrt(bound);
+        // Square-root (upper bound)
+        Integer rem;
+        bound = Givaro::sqrtrem(bound, rem);
+        if (rem != 0) {
+            bound += 1;
+        }
     }
 
     template <class IMatrix>
@@ -215,8 +218,12 @@ namespace LinBox {
             bound *= normSquared;
         }
 
-        // Square-root
-        bound = Givaro::sqrt(bound);
+        // Square-root (upper bound)
+        Integer rem;
+        bound = Givaro::sqrtrem(bound, rem);
+        if (rem != 0) {
+            bound += 1;
+        }
     }
 
     template <class IMatrix>
@@ -236,20 +243,24 @@ namespace LinBox {
     template <class IMatrix>
     HadamarBoundDetails DetailedHadamardBound(const IMatrix& A)
     {
+        // @note We can't use the rowBoundOverMinNorm because
+        // the rational solve Hadamard bound uses it for the numerator bound.
+
         Integer rowBound;
-        Integer rowMinNormSquared;
-        HadamardRowBound(rowBound, rowMinNormSquared, A);
-        Integer rowBoundOverMinNorm = rowBound / Givaro::sqrt(rowMinNormSquared);
+        HadamardRowBound(rowBound, A);
 #ifdef DEBUG_HADAMARD_BOUND
         std::clog << "rowBound:=" << rowBound << ';' << std::endl;
-        std::clog << "rowMinNormSquared:=" << rowMinNormSquared << ';' << std::endl;
-        std::clog << "rowBoundOverMinNorm:=" << rowBoundOverMinNorm << ';' << std::endl;
 #endif
 
+        Integer rem;
         Integer colBound;
         Integer colMinNormSquared;
         HadamardColBound(colBound, colMinNormSquared, A);
-        Integer colBoundOverMinNorm = colBound / Givaro::sqrt(colMinNormSquared);
+        Integer colBoundOverMinNorm;
+        Integer::divmod(colBoundOverMinNorm, rem, colBound, Givaro::sqrt(colMinNormSquared));
+        if (rem != 0) {
+            colBoundOverMinNorm += 1;
+        }
 #ifdef DEBUG_HADAMARD_BOUND
         std::clog << "colBound:=" << colBound << ';' << std::endl;
         std::clog << "colMinNormSquared:=" << colMinNormSquared << ';' << std::endl;
@@ -258,8 +269,7 @@ namespace LinBox {
 
         HadamarBoundDetails data;
         data.bound = (rowBound < colBound) ? rowBound : colBound;
-        data.boundOverMinNorm =
-            (rowBoundOverMinNorm < colBoundOverMinNorm) ? rowBoundOverMinNorm : colBoundOverMinNorm;
+        data.boundOverMinNorm = colBoundOverMinNorm;
 #ifdef DEBUG_HADAMARD_BOUND
         std::clog << "bound:=" << data.bound << ';' << std::endl;
         std::clog << "boundOverMinNorm:=" << data.boundOverMinNorm << ';' << std::endl;
@@ -429,8 +439,15 @@ namespace LinBox {
         Integer bNormSquared;
         vectorNormSquared(bNormSquared, b.begin(), b.end());
 
+        // Square-root of bNormSquared (upper bound)
+        Integer rem;
+        Integer bNorm = Givaro::sqrtrem(bNormSquared, rem);
+        if (rem != 0) {
+            bNorm += 1;
+        }
+
         data.denBound = hadamardBound.bound;
-        data.numBound = hadamardBound.boundOverMinNorm * Givaro::sqrt(bNormSquared);
+        data.numBound = hadamardBound.boundOverMinNorm * bNorm;
         if (data.denBound == 0 || data.numBound == 0) {
             data.solutionLogBound = 0.0;
         }
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 3154c1915..a3130dacb 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -77,9 +77,6 @@ namespace LinBox {
                 for (auto j = 0u; j < _lc.primesCount(); ++j) {
                     IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
                     _lc.ring().mulin(radices[j], _lc.prime(j));
-                    auto xxx = (_lc._A.getEntry(0, 0) * padicAccumulations[j][0] - _lc._b[0]) % radices[j];
-                    // std::cout << "xxx " << j << "." << i << " " << _lc._A.getEntry(0, 0) << " * " << padicAccumulations[j][0] << " - " << _lc._b[0] << " mod " << radices[j] << std::endl;
-                    std::cout << "xxx " << j << "." << i << " " << xxx << std::endl;
                 }
             }
 
@@ -100,7 +97,7 @@ namespace LinBox {
 
             // Rational reconstruction
             // @note RR expects the bounds to be strict, this is why we add a + 1
-            craBuilder.result(xNum, xDen, _lc.numBound() + 1, _lc.denBound() + 1);
+            craBuilder.result(xNum, xDen, _lc.numBound() + 1);
 
             return true;
         }
@@ -139,10 +136,8 @@ namespace LinBox {
                 std::cerr << "OUCH!" << std::endl;
             }
 
-            // #ifdef DEBUG_HADAMARD_BOUND
-            std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << " " << xNum[0] << ';' << std::endl;
-            std::clog << "denLog " << Givaro::logtwo(xDen) << " " << xDen << ';' << std::endl;
-            // #endif
+            std::cout << "numLog " << xNum << std::endl;
+            std::cout << "denLog " << xDen << std::endl;
         }
 
     private:

From 179f5776815c7a8ab5dbf78dec299095f63831bb Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Thu, 20 Jun 2019 11:02:43 +0200
Subject: [PATCH 32/63] Added DixonRNS to benchmark-dense-solve

---
 benchmarks/benchmark-dense-solve.C            |  1 +
 linbox/algorithms/lifting-container.h         |  6 +-
 .../algorithms/multi-mod-lifting-container.h  | 55 ++++++-------------
 linbox/solutions/methods.h                    |  2 +-
 linbox/solutions/solve/solve-dixon-rns.h      |  3 -
 tests/test-solve-full.C                       |  6 ++
 6 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index 504cb69e0..a678c52f0 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -114,6 +114,7 @@ void benchmark(std::pair<double, double>& timebits, Arguments& args, MethodBase&
     else if (args.methodString == "DenseElimination")       solve(X, A, B, Method::DenseElimination(method));
     else if (args.methodString == "SparseElimination")      solve(X, A, B, Method::SparseElimination(method));
     else if (args.methodString == "Dixon")                  solve(X, A, B, Method::Dixon(method));
+    else if (args.methodString == "DixonRNS")               solve(X, A, B, Method::DixonRNS(method));
     else if (args.methodString == "CRA")                    solve(X, A, B, Method::CRAAuto(method));
     else if (args.methodString == "SymbolicNumericOverlap") solve(X, A, B, Method::SymbolicNumericOverlap(method));
     else if (args.methodString == "SymbolicNumericNorm")    solve(X, A, B, Method::SymbolicNumericNorm(method));
diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h
index 81c992bc5..01d7d5260 100644
--- a/linbox/algorithms/lifting-container.h
+++ b/linbox/algorithms/lifting-container.h
@@ -153,13 +153,13 @@ namespace LinBox
             this->_intRing.convert(Prime,_p);
 
             auto hb = RationalSolveHadamardBound(A, b);
-            N = Integer(1) << static_cast<uint64_t>(std::ceil(hb.numLogBound));
-            D = Integer(1) << static_cast<uint64_t>(std::ceil(hb.denLogBound));
+            N = hb.numBound;
+            D = hb.denBound;
 
             // L = N * D * 2
             // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime)
             double primeLog2 = Givaro::logtwo(Prime);
-            _length = std::ceil((1 + hb.numLogBound + hb.denLogBound) / primeLog2); // round up instead of down
+            _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down
 #ifdef DEBUG_LC
 			std::cout<<" norms computed, p = "<<_p<<"\n";
 			std::cout<<" N = "<<N<<", D = "<<D<<", length = "<<_length<<"\n";
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 4fb588f3d..0469baada 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -96,22 +96,18 @@ namespace LinBox {
         {
             linbox_check(A.rowdim() == A.coldim());
 
-            std::cout << "----------" << std::endl;
-            A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
-            std::cout << "b: " << b << std::endl;
+            // std::cout << "----------" << std::endl;
+            // A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
+            // std::cout << "b: " << b << std::endl;
 
             // This will contain the primes or our MultiMod basis
-            // @fixme Pass the count through Method::DixonRNS (and rename it Method::DixonMultiMod?)
-            _primesCount = 2;
+            _primesCount = m.primesCount;
             _primes.resize(_primesCount);
-            std::cout << "primesCount: " << _primesCount << std::endl;
 
             // Some preparation work
             Integer infinityNormA;
             InfinityNorm(infinityNormA, A);
             double logInfinityNormA = Givaro::logtwo(infinityNormA);
-            std::cout << "infinityNormA: " << infinityNormA << std::endl;
-            std::cout << "logInfinityNormA: " << logInfinityNormA << std::endl;
 
             {
                 // Based on Chen-Storjohann's paper, this is the bit size
@@ -119,9 +115,7 @@ namespace LinBox {
                 double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n)); // @fixme @jgdumas Is this OK, then?
                 _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1));
                 _rnsPrimes.resize(_rnsPrimesCount);
-                std::cout << "primeGenerator.getBits(): " << primeGenerator.getBits() << std::endl;
-                std::cout << "rnsBasisBitSize: " << rnsBasisBitSize << std::endl;
-                std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
+                // std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
 
                 std::vector<double> primes;
                 for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) {
@@ -156,25 +150,15 @@ namespace LinBox {
                     if (bitSize > rnsBasisBitSize && h > 0) {
                         _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h);
                         _rnsPrimesCount -= h;
-                        std::cout << "RNS basis: Erasing extra " << h << " primes." << std::endl;
                         std::cout << _rnsPrimes.size() << std::endl;
                         break;
                     }
                 }
             }
 
-            // Generating primes
-            // @fixme Cleanup, might not be needed
-            {
-                IElement iTmp;
-                _ring.assign(_primesProduct, _ring.one);
-                for (auto& pj : _primes) {
-                    _fields.emplace_back(pj);
-                    _ring.init(iTmp, pj);
-                    _ring.mulin(_primesProduct, iTmp);
-                }
-
-                std::cout << "primesProduct: " << _primesProduct << std::endl;
+            // Setting fields up
+            for (auto& pj : _primes) {
+                _fields.emplace_back(pj);
             }
 
             // Initialize all inverses
@@ -228,19 +212,18 @@ namespace LinBox {
 
             // Compute how many iterations are needed
             {
+                double log2PrimesProduct = 0.0;
+                for (auto& pj : _primes) {
+                    log2PrimesProduct += Givaro::logtwo(Integer(pj));
+                }
+
                 auto hb = RationalSolveHadamardBound(A, b);
-                double log2P = Givaro::logtwo(_primesProduct);
-                // _iterationsCount = log2(2 * N * D) / log2(p)
                 _log2Bound = hb.solutionLogBound;
                 _numBound = hb.numBound;
                 _denBound = hb.denBound;
-                std::cout << "_log2Bound: " << _log2Bound << std::endl;
-                std::cout << "_numBound: " << _numBound << std::endl;
-                std::cout << "_denBound: " << _denBound << std::endl;
-                std::cout << "log2P: " << log2P << std::endl;
 
-                _iterationsCount = std::ceil(_log2Bound / log2P);
-                std::cout << "iterationsCount: " << _iterationsCount << std::endl;
+                // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...)
+                _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct);
             }
 
             //----- Locals setup
@@ -282,11 +265,8 @@ namespace LinBox {
         /// The dimension of the problem/solution.
         size_t size() const final { return _n; }
 
-        /**
-         * We are compliant to the interface even though
-         * p is multi-modular and thus not a prime per se.
-         */
-        const IElement& prime() const final { return _primesProduct; }
+        /// @note Useless, but in the API.
+        const IElement& prime() const final { return _ring.one; }
 
         // ------------------------------
         // ----- NOT LiftingContainer API
@@ -435,7 +415,6 @@ namespace LinBox {
         // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i]
         std::vector<std::vector<FElement>> _primesRNSInverses;
 
-        IElement _primesProduct;       // The global modulus for lifting: a multiple of all _primes.
         std::vector<FElement> _primes; // @fixme We might want something else as a type!
         std::vector<double> _rnsPrimes;
         // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h
index 0fe110c8e..1de057ec2 100644
--- a/linbox/solutions/methods.h
+++ b/linbox/solutions/methods.h
@@ -221,7 +221,7 @@ namespace LinBox {
         SingularSolutionType singularSolutionType = SingularSolutionType::Random;
 
         // ----- For DixonRNS method.
-        uint32_t primeBaseLength = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl.
+        uint32_t primesCount = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl.
 
         // ----- For random-based systems.
         size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up.
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index a3130dacb..58da58468 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -135,9 +135,6 @@ namespace LinBox {
             if (!re.getRational(xNum, xDen)) {
                 std::cerr << "OUCH!" << std::endl;
             }
-
-            std::cout << "numLog " << xNum << std::endl;
-            std::cout << "denLog " << xDen << std::endl;
         }
 
     private:
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 363e0a781..e1d993223 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -205,6 +205,7 @@ int main(int argc, char** argv)
     Integer q = 131071;
     bool verbose = false;
     bool loop = false;
+    int primesCount = -1;
     int seed = -1;
     int bitSize = 10;
     int vectorBitSize = -1;
@@ -214,6 +215,7 @@ int main(int argc, char** argv)
 
     static Argument args[] = {
         {'q', "-q", "Field characteristic.", TYPE_INTEGER, &q},
+        {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &primesCount},
         {'v', "-v", "Enable verbose mode.", TYPE_BOOL, &verbose},
         {'l', "-l", "Infinite loop of tests.", TYPE_BOOL, &loop},
         {'s', "-s", "Seed for randomness.", TYPE_INT, &seed},
@@ -244,6 +246,10 @@ int main(int argc, char** argv)
         return EXIT_FAILURE;
     }
 
+    if (primesCount > 0) {
+        method.primesCount = primesCount;
+    }
+
     if (vectorBitSize < 0) {
         vectorBitSize = bitSize;
     }

From 790f82da6e97bd52fa226fbc81b6e4d1f24baff4 Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Thu, 20 Jun 2019 17:49:39 +0200
Subject: [PATCH 33/63] Instrumented for precise timings

---
 linbox/algorithms/multi-mod-lifting-container.h | 15 ++++++++++++---
 linbox/solutions/solve/solve-dixon-rns.h        |  9 +++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 0469baada..b6080ab0d 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -150,7 +150,6 @@ namespace LinBox {
                     if (bitSize > rnsBasisBitSize && h > 0) {
                         _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h);
                         _rnsPrimesCount -= h;
-                        std::cout << _rnsPrimes.size() << std::endl;
                         break;
                     }
                 }
@@ -291,7 +290,10 @@ namespace LinBox {
         {
             VectorDomain<Ring> IVD(_ring);
 
+            commentator().start("[MultiModLifting] nextDigit");
+
             // @fixme Should be done in parallel!
+            commentator().start("[MultiModLifting] Computing c");
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
                 auto& r = _r[j];
@@ -329,6 +331,7 @@ namespace LinBox {
                     setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]);
                 }
             }
+            commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
 
             // ----- Compute the next residues!
 
@@ -336,12 +339,15 @@ namespace LinBox {
 
             // By first computing R <= R - A c as a fgemm within the RNS domain.
             // @fixme Use parallel helper!
+            commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
             FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
                          _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one,
                          _rnsR, _primesCount);
+            commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
             // We divide each residues by the according pj, which is done by multiplying.
             // @fixme Could be done in parallel!
+            commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
             for (auto j = 0u; j < _primesCount; ++j) {
                 for (auto i = 0u; i < _n; ++i) {
                     auto& rnsElement = _rnsR[i * _primesCount + j];
@@ -352,8 +358,10 @@ namespace LinBox {
                     }
                 }
             }
+            commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
             // @fixme Could be done in parallel!
+            commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto& r = _r[j];
                 auto& Q = _Q[j];
@@ -366,8 +374,10 @@ namespace LinBox {
 
                 IVD.addin(r, Q);
             }
+            commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+
+            commentator().stop("[MultiModLifting] nextDigit");
 
-            ++_position;
             return true;
         }
 
@@ -433,6 +443,5 @@ namespace LinBox {
                                  // HAS TO BE A MATRIX for gemm.
         std::vector<FVector>
             _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later.
-        size_t _position;
     };
 }
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 58da58468..541cec859 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -55,6 +55,7 @@ namespace LinBox {
                 return true;
             }
 
+            commentator().start("[MultiModLifting] Lifting");
             VectorDomain<Ring> IVD(_lc.ring());
 
             // Stores each c0 + c1 pj + ... + ck pj^k for each pj
@@ -79,8 +80,10 @@ namespace LinBox {
                     _lc.ring().mulin(radices[j], _lc.prime(j));
                 }
             }
+            commentator().stop("[MultiModLifting] Lifting");
 
             // CRT reconstruction from paddicAccumulations
+            commentator().start("[MultiModLifting] CRT Reconstruction");
             using CRAField = Givaro::Modular<Integer>;
             RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound()
                                                               / 1.4427); // 1.4427 = 1 / log(2)
@@ -94,10 +97,13 @@ namespace LinBox {
                 CRAField field(radices[j]);
                 craBuilder.progress(field, padicAccumulations[j]);
             }
+            commentator().stop("[MultiModLifting] CRT Reconstruction");
 
             // Rational reconstruction
             // @note RR expects the bounds to be strict, this is why we add a + 1
+            commentator().start("[MultiModLifting] Rational Reconstruction");
             craBuilder.result(xNum, xDen, _lc.numBound() + 1);
+            commentator().stop("[MultiModLifting] Rational Reconstruction");
 
             return true;
         }
@@ -129,8 +135,11 @@ namespace LinBox {
             linbox_check(A.rowdim() == A.coldim());
 
             using LiftingContainer = MultiModLiftingContainer<Field, Ring, PrimeGenerator>;
+
+            commentator().start("[MultiModLifting] Init");
             LiftingContainer lc(_ring, _primeGenerator, A, b, m);
             MultiModRationalReconstruction<LiftingContainer> re(lc);
+            commentator().stop("[MultiModLifting] Init");
 
             if (!re.getRational(xNum, xDen)) {
                 std::cerr << "OUCH!" << std::endl;

From 7d161c7f73c98832b35219916e3c4087773e30b2 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 21 Jun 2019 15:08:14 +0200
Subject: [PATCH 34/63] Parallel convert + fgemm

---
 linbox/algorithms/lifting-container.h         |  1 +
 .../algorithms/multi-mod-lifting-container.h  | 73 ++++++++++---------
 .../rational-cra-builder-full-multip.h        |  8 +-
 linbox/solutions/solve/solve-dixon-rns.h      |  8 +-
 tests/test-solve-full.C                       |  6 +-
 5 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h
index 19370890d..9664f75f4 100644
--- a/linbox/algorithms/lifting-container.h
+++ b/linbox/algorithms/lifting-container.h
@@ -162,6 +162,7 @@ namespace LinBox
             // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime)
             double primeLog2 = Givaro::logtwo(Prime);
             _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down
+			std::cout << "_length "<< _length << std::endl;
 #ifdef DEBUG_LC
 			std::cout<<" norms computed, p = "<<_p<<"\n";
 			std::cout<<" N = "<<N<<", D = "<<D<<", length = "<<_length<<"\n";
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index b6080ab0d..ca2649df5 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -112,7 +112,7 @@ namespace LinBox {
             {
                 // Based on Chen-Storjohann's paper, this is the bit size
                 // of the needed RNS basis for the residue computation
-                double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n)); // @fixme @jgdumas Is this OK, then?
+                double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n));
                 _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1));
                 _rnsPrimes.resize(_rnsPrimesCount);
                 // std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
@@ -213,7 +213,7 @@ namespace LinBox {
             {
                 double log2PrimesProduct = 0.0;
                 for (auto& pj : _primes) {
-                    log2PrimesProduct += Givaro::logtwo(Integer(pj));
+                    log2PrimesProduct += Givaro::logtwo(pj);
                 }
 
                 auto hb = RationalSolveHadamardBound(A, b);
@@ -223,6 +223,7 @@ namespace LinBox {
 
                 // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...)
                 _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct);
+                std::cout << "_iterationsCount " << _iterationsCount << std::endl;
             }
 
             //----- Locals setup
@@ -231,6 +232,7 @@ namespace LinBox {
             _Q.reserve(_primesCount);
             _R.reserve(_primesCount);
             _Fc.reserve(_primesCount);
+            _FR.reserve(_primesCount);
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto& F = _fields[j];
 
@@ -238,6 +240,7 @@ namespace LinBox {
                 _Q.emplace_back(_ring, _n);
                 _R.emplace_back(_ring, _n);
                 _Fc.emplace_back(F, _n);
+                _FR.emplace_back(F, _n);
 
                 // Initialize all residues to b
                 _r.back() = _b; // Copying data
@@ -290,10 +293,8 @@ namespace LinBox {
         {
             VectorDomain<Ring> IVD(_ring);
 
-            commentator().start("[MultiModLifting] nextDigit");
-
-            // @fixme Should be done in parallel!
-            commentator().start("[MultiModLifting] Computing c");
+            // commentator().start("[MultiModLifting] Computing c");
+            #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
                 auto& r = _r[j];
@@ -303,25 +304,22 @@ namespace LinBox {
                 // @note There is no VectorDomain::divmod yet.
                 // Euclidian division so that rj = pj Qj + Rj
                 for (auto i = 0u; i < _n; ++i) {
-                    // @fixme @cpernet Is this OK for any Ring or should we be sure we are using
-                    // Integers?
                     _ring.quoRem(Q[i], R[i], r[i], pj);
-                    // std::cout << "Q" << j << " " << Q[i] << std::endl;
-                    // std::cout << "R" << j << " " << R[i] << std::endl;
                 }
 
                 // Convert R to the field
-                // @fixme Put that FVector in already allocated memory, and not use a temporary here.
                 auto& F = _fields[j];
-                FVector FR(F, R); // rebind
-
+                auto& FR = _FR[j];
+                auto& digit = digits[j];
                 auto& B = _B[j];
                 auto& Fc = _Fc[j];
+                // @fixme Am I copying the data an extra time?
+                FR = FVector(F, R); // rebind
                 B.apply(Fc, FR);
 
                 // @fixme We might not need to store digits into IVectors, and returning _Fc
                 // would do the trick
-                digits[j] = IVector(_ring, Fc);
+                digit = IVector(_ring, Fc);
 
                 // Store the very same result in an RNS system,
                 // but fact is all the primes of the RNS system are bigger
@@ -331,23 +329,32 @@ namespace LinBox {
                     setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]);
                 }
             }
-            commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
+            // commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
 
             // ----- Compute the next residues!
 
             // r <= Q + (R - A c) / p
 
             // By first computing R <= R - A c as a fgemm within the RNS domain.
-            // @fixme Use parallel helper!
-            commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
-            FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
-                         _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one,
-                         _rnsR, _primesCount);
-            commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
+            PAR_BLOCK
+            {
+                using RNSParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::RNSModulus, FFLAS::StrategyParameter::Threads>;
+                using FGEMMSequential = FFLAS::ParSeqHelper::Sequential;
+                using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RNSParallel, FGEMMSequential>;
+                using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
+                ComposedParSeqHelper composedParSeqHelper(4, 4);
+                MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
+
+                FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
+                            _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one,
+                            _rnsR, _primesCount, mmHelper);
+            }
+            // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
             // We divide each residues by the according pj, which is done by multiplying.
             // @fixme Could be done in parallel!
-            commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
+            // @fixme @cpernet Don't know why, can't make it parallel!
+            // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
             for (auto j = 0u; j < _primesCount; ++j) {
                 for (auto i = 0u; i < _n; ++i) {
                     auto& rnsElement = _rnsR[i * _primesCount + j];
@@ -358,10 +365,10 @@ namespace LinBox {
                     }
                 }
             }
-            commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
+            // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
-            // @fixme Could be done in parallel!
-            commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+            // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+            #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto& r = _r[j];
                 auto& Q = _Q[j];
@@ -369,14 +376,13 @@ namespace LinBox {
                 // r <- (R - Ac) / p
                 // @fixme @cpernet Don't know how to do that with one fconvert_rns!
                 for (auto i = 0u; i < _n; ++i) {
-                    FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _primesCount + j));
+                    FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1,
+                                        _rnsR + (i * _primesCount + j));
                 }
 
                 IVD.addin(r, Q);
             }
-            commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
-
-            commentator().stop("[MultiModLifting] nextDigit");
+            // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
 
             return true;
         }
@@ -385,7 +391,7 @@ namespace LinBox {
         // Helper function, setting all residues of a matrix element to the very same value.
         // This doesn't check the moduli.
         inline void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j,
-                                            double value)
+                                                   double value)
         {
             auto& Aij = A[i * lda + j];
             auto stride = Aij._stride;
@@ -439,9 +445,8 @@ namespace LinBox {
         std::vector<IVector> _r; // @todo Could be a matrix? Might not be useful, as it is never
                                  // used directly in computations.
         std::vector<IVector> _Q;
-        std::vector<IVector> _R; // @fixme This one should be expressed in a RNS system q, and
-                                 // HAS TO BE A MATRIX for gemm.
-        std::vector<FVector>
-            _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later.
+        std::vector<IVector> _R; // Will be inited to RNS within _rnsR
+        std::vector<FVector> _Fc;
+        std::vector<FVector> _FR;
     };
 }
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index 6359c694e..6a191bf6f 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -68,7 +68,11 @@ namespace LinBox
         template <class Vect>
 		Vect& result (Vect &num, Integer& den, const Integer& numBound)
 		{
+            commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction");
             Father_t::result(num, false);
+            commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction");
+
+            commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction");
             den = 1;
             const auto& mod = Father_t::getModulus();
             Integer nd;
@@ -81,6 +85,7 @@ namespace LinBox
                     den *= nd;
                 }
             }
+            commentator().stop("[RationalCRABuilderFullMultip] Rational Reconstruction");
             return num;
         }
 
@@ -88,7 +93,8 @@ namespace LinBox
 		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn)
 		{
             // @note This interface of the rational does the RatRecon.
-            Givaro::Rational myRational(Integer::modin(u1 *= old_den, m1), m1, sn);
+            Integer::modin(u1 *= old_den, m1);
+            Givaro::Rational myRational(u1, m1, sn);
             u1 = myRational.nume();
             new_den = myRational.deno();
 			return u1;
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 541cec859..08fa84355 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -83,7 +83,7 @@ namespace LinBox {
             commentator().stop("[MultiModLifting] Lifting");
 
             // CRT reconstruction from paddicAccumulations
-            commentator().start("[MultiModLifting] CRT Reconstruction");
+            commentator().start("[MultiModLifting] CRT Reconstruction Progress");
             using CRAField = Givaro::Modular<Integer>;
             RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound()
                                                               / 1.4427); // 1.4427 = 1 / log(2)
@@ -97,13 +97,11 @@ namespace LinBox {
                 CRAField field(radices[j]);
                 craBuilder.progress(field, padicAccumulations[j]);
             }
-            commentator().stop("[MultiModLifting] CRT Reconstruction");
+            commentator().stop("[MultiModLifting] CRT Reconstruction Progress");
 
             // Rational reconstruction
             // @note RR expects the bounds to be strict, this is why we add a + 1
-            commentator().start("[MultiModLifting] Rational Reconstruction");
             craBuilder.result(xNum, xDen, _lc.numBound() + 1);
-            commentator().stop("[MultiModLifting] Rational Reconstruction");
 
             return true;
         }
@@ -165,7 +163,7 @@ namespace LinBox {
         // implicitly requiring 0-{p-1} representation of the p-adic sequence elements.
         using Field = Givaro::Modular<double>;
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
-        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()), 12); // @fixme REMOVE SEED
+        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
 
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 39bf409a0..a30783798 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -95,6 +95,8 @@ namespace {
 template <class SolveMethod, class Matrix, class Vector, class ResultMatrix, class ResultVector>
 bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb)
 {
+    std::cout << "Checking result..." << std::endl;
+
     ResultVector RAx(RA.field(), Rb.size());
     RA.apply(RAx, x);
 
@@ -104,6 +106,8 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul
         return false;
     }
 
+    std::cout << "Result OK !" << std::endl;
+
     return true;
 }
 
@@ -291,7 +295,7 @@ int main(int argc, char** argv)
         // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
         // // ----- Rational Dixon
-        // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // // @fixme Dixon<Wiedemann> does not compile
         // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);

From 430278f8bdc0e6f552b77d1215b9be0375054fc3 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 21 Jun 2019 15:43:21 +0200
Subject: [PATCH 35/63] Speed up thanks to fconvert on matrix

---
 .../algorithms/multi-mod-lifting-container.h  | 44 ++++++++-----------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index ca2649df5..2baff2fae 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -93,6 +93,8 @@ namespace LinBox {
             , _A(A)
             , _b(b)
             , _n(A.rowdim())
+            , _rMatrix(_ring)
+            , _qMatrix(_ring)
         {
             linbox_check(A.rowdim() == A.coldim());
 
@@ -228,22 +230,23 @@ namespace LinBox {
 
             //----- Locals setup
 
-            _r.reserve(_primesCount);
-            _Q.reserve(_primesCount);
+            _rMatrix = IMatrix(_ring, _n, _primesCount);
+            _qMatrix = IMatrix(_ring, _n, _primesCount);
+
             _R.reserve(_primesCount);
             _Fc.reserve(_primesCount);
             _FR.reserve(_primesCount);
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto& F = _fields[j];
 
-                _r.emplace_back(_ring, _n);
-                _Q.emplace_back(_ring, _n);
                 _R.emplace_back(_ring, _n);
                 _Fc.emplace_back(F, _n);
                 _FR.emplace_back(F, _n);
 
                 // Initialize all residues to b
-                _r.back() = _b; // Copying data
+                for (auto i = 0u; i < _n; ++i) {
+                    _rMatrix.refEntry(i, j) = _b[i];
+                }
             }
         }
 
@@ -292,19 +295,18 @@ namespace LinBox {
         bool next(std::vector<IVector>& digits)
         {
             VectorDomain<Ring> IVD(_ring);
+            BlasMatrixDomain<Ring> IMD(_ring);
 
             // commentator().start("[MultiModLifting] Computing c");
             #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
-                auto& r = _r[j];
-                auto& Q = _Q[j];
                 auto& R = _R[j];
 
                 // @note There is no VectorDomain::divmod yet.
                 // Euclidian division so that rj = pj Qj + Rj
                 for (auto i = 0u; i < _n; ++i) {
-                    _ring.quoRem(Q[i], R[i], r[i], pj);
+                    _ring.quoRem(_qMatrix.refEntry(i, j), R[i], _rMatrix.getEntry(i, j), pj);
                 }
 
                 // Convert R to the field
@@ -336,13 +338,14 @@ namespace LinBox {
             // r <= Q + (R - A c) / p
 
             // By first computing R <= R - A c as a fgemm within the RNS domain.
+            // commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
             PAR_BLOCK
             {
                 using RNSParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::RNSModulus, FFLAS::StrategyParameter::Threads>;
                 using FGEMMSequential = FFLAS::ParSeqHelper::Sequential;
                 using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RNSParallel, FGEMMSequential>;
                 using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
-                ComposedParSeqHelper composedParSeqHelper(4, 4);
+                ComposedParSeqHelper composedParSeqHelper(4, 4); // @fixme REPLACE THESE 444!
                 MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
 
                 FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
@@ -368,20 +371,10 @@ namespace LinBox {
             // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
             // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
-            #pragma omp parallel for
-            for (auto j = 0u; j < _primesCount; ++j) {
-                auto& r = _r[j];
-                auto& Q = _Q[j];
-
-                // r <- (R - Ac) / p
-                // @fixme @cpernet Don't know how to do that with one fconvert_rns!
-                for (auto i = 0u; i < _n; ++i) {
-                    FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1,
-                                        _rnsR + (i * _primesCount + j));
-                }
-
-                IVD.addin(r, Q);
-            }
+            // @fixme @cpernet Is this parallel?
+            FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount,
+                                _rnsR + 0);
+            IMD.addin(_rMatrix, _qMatrix);
             // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
 
             return true;
@@ -442,11 +435,10 @@ namespace LinBox {
         std::vector<Field> _fields; // All fields Modular<p[i]>
 
         //----- Iteration
-        std::vector<IVector> _r; // @todo Could be a matrix? Might not be useful, as it is never
-                                 // used directly in computations.
-        std::vector<IVector> _Q;
         std::vector<IVector> _R; // Will be inited to RNS within _rnsR
         std::vector<FVector> _Fc;
         std::vector<FVector> _FR;
+        IMatrix _rMatrix;
+        IMatrix _qMatrix;
     };
 }

From eb7c3dd012e18a0fcef62f7eeb1663d4a54edee8 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 21 Jun 2019 16:44:40 +0200
Subject: [PATCH 36/63] Working on INV mul

---
 .../dixon-solver/dixon-solver-dense.inl       |  4 +++
 .../algorithms/multi-mod-lifting-container.h  | 35 +++++++++++++------
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
index f1ab9266e..ed0978683 100644
--- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
+++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
@@ -129,12 +129,14 @@ namespace LinBox {
         } while (notfr);
 
         typedef DixonLiftingContainer<Ring, Field, IMatrix, BlasMatrix<Field>> LiftingContainer;
+        commentator().start("CLASSIC DIXON LIFTING");
         LiftingContainer lc(_ring, *F, A, *FMP, b, _prime);
         RationalReconstruction<LiftingContainer> re(lc);
         if (!re.getRational(num, den, 0)) {
             delete FMP;
             return SS_FAILED;
         }
+        commentator().stop("CLASSIC DIXON LIFTING");
 #ifdef RSTIMING
         ttNonsingularSolve.update(re, lc);
 #endif
@@ -703,6 +705,7 @@ namespace LinBox {
             // ----- Do lifting on sub matrix
 
             BlasMatrix<Ring> BBA_minor(A_minor);
+            commentator().start("CLASSIC DIXON LIFTING");
             LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime);
 
             // ----- Reconstruct rational
@@ -713,6 +716,7 @@ namespace LinBox {
                 // dirty, but should not be called
                 return SS_FAILED;
             }
+            commentator().stop("CLASSIC DIXON LIFTING");
 
 #ifdef RSTIMING
             ttSystemSolve.update(re, lc);
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 2baff2fae..4fbf7c864 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -190,6 +190,7 @@ namespace LinBox {
                 _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n);
                 _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
                 _rnsR = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount);
+                _rnsPrimesInverses = FFLAS::fflas_new(*_rnsDomain, _primesCount);
 
                 // @note So that 2^(16*cmax) is the max element of A.
                 double cmax = logInfinityNormA / 16.;
@@ -202,11 +203,17 @@ namespace LinBox {
                 _primesRNSInverses.resize(_primesCount);
                 for (auto j = 0u; j < _primesCount; ++j) {
                     auto prime = _primes[j];
-                    _primesRNSInverses[j].resize(_rnsPrimesCount);
+
+                    auto& rnsPrimeInverse = _rnsPrimesInverses[j];
+                    auto stride = rnsPrimeInverse._stride;
+
+                    _primesRNSInverses[j].resize(_rnsPrimesCount); // @fixme TBR
+
                     for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                         auto& rnsF = _rnsSystem->_field_rns[h];
                         auto& primeInverse = _primesRNSInverses[j][h];
                         rnsF.inv(primeInverse, prime);
+                        rnsPrimeInverse._ptr[h * stride] = primeInverse;
                     }
                 }
             }
@@ -357,25 +364,30 @@ namespace LinBox {
             // We divide each residues by the according pj, which is done by multiplying.
             // @fixme Could be done in parallel!
             // @fixme @cpernet Don't know why, can't make it parallel!
-            // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
-            for (auto j = 0u; j < _primesCount; ++j) {
-                for (auto i = 0u; i < _n; ++i) {
-                    auto& rnsElement = _rnsR[i * _primesCount + j];
-                    auto stride = rnsElement._stride;
+            commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
+            for (auto i = 0u; i < _n; ++i) {
+                for (auto j = 0u; j < _primesCount; ++j) {
+                    auto& rnsPrimeInverse = _rnsPrimesInverses[j];
+                    auto& rnsR = _rnsR[i * _primesCount + j];
+
+                    // @fixme @cpernet Just doing _rnsDomain->mulin(rnsR, _rnsPrimesInverses[j]);
+                    // But mulin doesn't exist on that domain, and fgemm on 1x1 is much slower
+                    auto stridePrimeInverse = rnsPrimeInverse._stride;
+                    auto strideR = rnsR._stride;
                     for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                         auto& rnsF = _rnsSystem->_field_rns[h];
-                        rnsF.mulin(rnsElement._ptr[h * stride], _primesRNSInverses[j][h]);
+                        rnsF.mulin(rnsR._ptr[h * strideR], rnsPrimeInverse._ptr[h * stridePrimeInverse]);
                     }
                 }
             }
-            // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
+            commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
-            // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+            commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
             // @fixme @cpernet Is this parallel?
             FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount,
                                 _rnsR + 0);
             IMD.addin(_rMatrix, _qMatrix);
-            // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+            commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
 
             return true;
         }
@@ -423,8 +435,9 @@ namespace LinBox {
         size_t _rnsPrimesCount = 0u;
         // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i]
         std::vector<std::vector<FElement>> _primesRNSInverses;
+        RNSElementPtr _rnsPrimesInverses;
 
-        std::vector<FElement> _primes; // @fixme We might want something else as a type!
+        std::vector<double> _primes;
         std::vector<double> _rnsPrimes;
         // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound).
         size_t _iterationsCount = 0u;

From 7957f03616194e20e3d3539f02fe7dd17d28f3e0 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 26 Jun 2019 17:00:02 +0200
Subject: [PATCH 37/63] Parallel init for MultiModLiftingContainer

---
 .../algorithms/multi-mod-lifting-container.h  | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 4fbf7c864..bfe2e2382 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -167,21 +167,27 @@ namespace LinBox {
             // and pass through to the lifting container. Here, we could use that, but we have
             // to keep control of generated primes, so that the RNS base has bigger primes
             // than the .
+            commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations");
             {
                 _B.reserve(_primesCount);
+                for (auto& F : _fields) {
+                    _B.emplace_back(A, F);
+                }
 
-                for (const auto& F : _fields) {
-                    _B.emplace_back(A, F); // Rebind into the field
-
+                // @fixme To be replaced with Paladin
+                #pragma omp parallel for
+                for (auto j = 0u; j < _primesCount; ++j) {
                     int nullity = 0;
+                    auto& F = _fields[j];
                     BlasMatrixDomain<Field> bmd(F);
-                    bmd.invin(_B.back(), nullity);
+                    bmd.invin(_B[j], nullity);
                     if (nullity > 0) {
                         // @fixme Should redraw another prime!
                         throw LinBoxError("Wrong prime, sorry.");
                     }
                 }
             }
+            commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations");
 
             // Making A into the RNS domain
             {
@@ -200,20 +206,16 @@ namespace LinBox {
 
             // Compute the inverses of pj for each RNS prime
             {
-                _primesRNSInverses.resize(_primesCount);
                 for (auto j = 0u; j < _primesCount; ++j) {
                     auto prime = _primes[j];
 
                     auto& rnsPrimeInverse = _rnsPrimesInverses[j];
                     auto stride = rnsPrimeInverse._stride;
 
-                    _primesRNSInverses[j].resize(_rnsPrimesCount); // @fixme TBR
-
                     for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                         auto& rnsF = _rnsSystem->_field_rns[h];
-                        auto& primeInverse = _primesRNSInverses[j][h];
+                        auto& primeInverse = rnsPrimeInverse._ptr[h * stride];
                         rnsF.inv(primeInverse, prime);
-                        rnsPrimeInverse._ptr[h * stride] = primeInverse;
                     }
                 }
             }
@@ -352,7 +354,7 @@ namespace LinBox {
                 using FGEMMSequential = FFLAS::ParSeqHelper::Sequential;
                 using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RNSParallel, FGEMMSequential>;
                 using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
-                ComposedParSeqHelper composedParSeqHelper(4, 4); // @fixme REPLACE THESE 444!
+                ComposedParSeqHelper composedParSeqHelper(_primes.size(), _primes.size());
                 MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
 
                 FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
@@ -364,7 +366,7 @@ namespace LinBox {
             // We divide each residues by the according pj, which is done by multiplying.
             // @fixme Could be done in parallel!
             // @fixme @cpernet Don't know why, can't make it parallel!
-            commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
+            // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
             for (auto i = 0u; i < _n; ++i) {
                 for (auto j = 0u; j < _primesCount; ++j) {
                     auto& rnsPrimeInverse = _rnsPrimesInverses[j];
@@ -380,14 +382,14 @@ namespace LinBox {
                     }
                 }
             }
-            commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
+            // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
-            commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+            // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
             // @fixme @cpernet Is this parallel?
             FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount,
                                 _rnsR + 0);
             IMD.addin(_rMatrix, _qMatrix);
-            commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
+            // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
 
             return true;
         }
@@ -433,8 +435,7 @@ namespace LinBox {
         RNSElementPtr _rnsc;
         RNSElementPtr _rnsR;
         size_t _rnsPrimesCount = 0u;
-        // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i]
-        std::vector<std::vector<FElement>> _primesRNSInverses;
+        // Stores the inverse of pj within the RNS base prime into _rnsPrimesInverses[j]
         RNSElementPtr _rnsPrimesInverses;
 
         std::vector<double> _primes;

From 161eb3f05e6f80901a0ef944ce1bfb619aca5f85 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 26 Jun 2019 17:22:00 +0200
Subject: [PATCH 38/63] Added move assignment operator to blas-vector.

---
 .../algorithms/multi-mod-lifting-container.h   |  3 ++-
 linbox/vector/blas-vector.h                    | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index bfe2e2382..6e56fccb1 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -324,7 +324,8 @@ namespace LinBox {
                 auto& digit = digits[j];
                 auto& B = _B[j];
                 auto& Fc = _Fc[j];
-                // @fixme Am I copying the data an extra time?
+
+                // @note The assignment will call the move one, not copying data twice.
                 FR = FVector(F, R); // rebind
                 B.apply(Fc, FR);
 
diff --git a/linbox/vector/blas-vector.h b/linbox/vector/blas-vector.h
index 71faff05c..1a6807850 100644
--- a/linbox/vector/blas-vector.h
+++ b/linbox/vector/blas-vector.h
@@ -320,6 +320,24 @@ namespace LinBox { /* BlasVector */
 			return *this;
 		}
 
+
+		BlasVector<_Field,_blasRep>& operator= (BlasVector<_Field,_blasRep>&& V)
+		{
+			if ( &V == this)
+				return *this;
+
+			_size = V._size;
+			_1stride = V._1stride;
+			_rep = std::move(V._rep);
+			_ptr = _rep.data();
+			_field = V._field;
+
+			// Father_t is garbage until then:
+			setIterators();
+
+			return *this;
+		}
+
 		//! this should not exist.
 		BlasVector<_Field,_blasRep>& operator= (const std::vector<Element>& V)
 		{

From cd14524cea889f39ea712e7ed2df407c0b890901 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 26 Jun 2019 18:04:11 +0200
Subject: [PATCH 39/63] Lifting container now returns a vector of field vector
 instead of integer ones.

---
 .../algorithms/multi-mod-lifting-container.h  | 24 +++++++------------
 linbox/solutions/solve/solve-dixon-rns.h      | 22 +++++++++++------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 6e56fccb1..4d5d160d5 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -243,13 +243,11 @@ namespace LinBox {
             _qMatrix = IMatrix(_ring, _n, _primesCount);
 
             _R.reserve(_primesCount);
-            _Fc.reserve(_primesCount);
             _FR.reserve(_primesCount);
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto& F = _fields[j];
 
                 _R.emplace_back(_ring, _n);
-                _Fc.emplace_back(F, _n);
                 _FR.emplace_back(F, _n);
 
                 // Initialize all residues to b
@@ -291,8 +289,8 @@ namespace LinBox {
         Integer denBound() const { return _denBound; }
 
         uint32_t primesCount() const { return _primesCount; }
-
         const FElement& prime(uint32_t index) const { return _primes.at(index); }
+        const std::vector<Field>& primesFields() const { return _fields; }
 
         // --------------
         // ----- Iterator
@@ -301,12 +299,12 @@ namespace LinBox {
          * Returns false if the next digit cannot be computed (bad modulus).
          * c is a vector of integers but all element are below p = p1 * ... * pl
          */
-        bool next(std::vector<IVector>& digits)
+        bool next(std::vector<FVector>& digits)
         {
             VectorDomain<Ring> IVD(_ring);
             BlasMatrixDomain<Ring> IMD(_ring);
 
-            // commentator().start("[MultiModLifting] Computing c");
+            commentator().start("[MultiModLifting] c = A^{-1} r mod p");
             #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
@@ -314,6 +312,8 @@ namespace LinBox {
 
                 // @note There is no VectorDomain::divmod yet.
                 // Euclidian division so that rj = pj Qj + Rj
+                // @fixme Should use quoRem on unsigned int, making R an uint vector,
+                // because it will be converted anyway.
                 for (auto i = 0u; i < _n; ++i) {
                     _ring.quoRem(_qMatrix.refEntry(i, j), R[i], _rMatrix.getEntry(i, j), pj);
                 }
@@ -323,25 +323,20 @@ namespace LinBox {
                 auto& FR = _FR[j];
                 auto& digit = digits[j];
                 auto& B = _B[j];
-                auto& Fc = _Fc[j];
 
                 // @note The assignment will call the move one, not copying data twice.
                 FR = FVector(F, R); // rebind
-                B.apply(Fc, FR);
-
-                // @fixme We might not need to store digits into IVectors, and returning _Fc
-                // would do the trick
-                digit = IVector(_ring, Fc);
+                B.apply(digit, FR);
 
                 // Store the very same result in an RNS system,
                 // but fact is all the primes of the RNS system are bigger
-                // than the modulus used to compute _Fc, we just copy the result for everybody.
+                // than the modulus used to compute the digit, we just copy the result for everybody.
                 for (auto i = 0u; i < _n; ++i) {
                     setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
-                    setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]);
+                    setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
                 }
             }
-            // commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
+            commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
 
             // ----- Compute the next residues!
 
@@ -451,7 +446,6 @@ namespace LinBox {
 
         //----- Iteration
         std::vector<IVector> _R; // Will be inited to RNS within _rnsR
-        std::vector<FVector> _Fc;
         std::vector<FVector> _FR;
         IMatrix _rMatrix;
         IMatrix _qMatrix;
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 08fa84355..2ae9b3aa9 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -37,6 +37,8 @@ namespace LinBox {
         using Ring = typename LiftingContainer::Ring;
         using IElement = typename LiftingContainer::IElement;
         using IVector = typename LiftingContainer::IVector;
+        using FElement = typename LiftingContainer::FElement;
+        using FVector = typename LiftingContainer::FVector;
 
     public:
         MultiModRationalReconstruction(LiftingContainer& lc)
@@ -56,26 +58,32 @@ namespace LinBox {
             }
 
             commentator().start("[MultiModLifting] Lifting");
-            VectorDomain<Ring> IVD(_lc.ring());
 
-            // Stores each c0 + c1 pj + ... + ck pj^k for each pj
-            std::vector<IVector> padicAccumulations(_lc.primesCount(), _lc.ring());
             // Temporary structure to store a ci for each pj
-            std::vector<IVector> digits(_lc.primesCount(),
-                                        _lc.ring()); // @fixme Could be a Field Element?
+            std::vector<FVector> digits;
+            digits.reserve(_lc.primesCount());
+            for (auto& F : _lc.primesFields()) {
+                digits.emplace_back(F, _lc.size());
+            }
+
             // The pj^i for each pj
             std::vector<IElement> radices(_lc.primesCount(), 1);
 
+            // Stores each c0 + c1 pj + ... + ck pj^k for each pj
+            std::vector<IVector> padicAccumulations(_lc.primesCount(), _lc.ring());
             for (auto j = 0u; j < _lc.primesCount(); ++j) {
                 padicAccumulations[j].resize(_lc.size());
-                digits[j].resize(_lc.size());
             }
 
+            // @fixme Better use PolEval (or will it cause memory explosion?)
+            VectorDomain<Ring> IVD(_lc.ring());
             for (auto i = 0u; i < _lc.length(); ++i) {
                 _lc.next(digits);
 
-                // @fixme Better use PolEval (except memory explosion?)
                 for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                    // @fixme @cpernet digits being a field vector, this will implicitly cast
+                    // each of its elements to a Integer, is there something better?
+                    // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter
                     IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
                     _lc.ring().mulin(radices[j], _lc.prime(j));
                 }

From 50d12c368bd79411197486704d32bd760ceae496 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 28 Jun 2019 10:42:29 +0200
Subject: [PATCH 40/63] Parallelized the padic accumulation

---
 linbox/algorithms/multi-mod-lifting-container.h | 8 ++++----
 linbox/solutions/solve/solve-dixon-rns.h        | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 4d5d160d5..97708fdc1 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -167,7 +167,7 @@ namespace LinBox {
             // and pass through to the lifting container. Here, we could use that, but we have
             // to keep control of generated primes, so that the RNS base has bigger primes
             // than the .
-            commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations");
+            // commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations");
             {
                 _B.reserve(_primesCount);
                 for (auto& F : _fields) {
@@ -187,7 +187,7 @@ namespace LinBox {
                     }
                 }
             }
-            commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations");
+            // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations");
 
             // Making A into the RNS domain
             {
@@ -304,7 +304,7 @@ namespace LinBox {
             VectorDomain<Ring> IVD(_ring);
             BlasMatrixDomain<Ring> IMD(_ring);
 
-            commentator().start("[MultiModLifting] c = A^{-1} r mod p");
+            // commentator().start("[MultiModLifting] c = A^{-1} r mod p");
             #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
@@ -336,7 +336,7 @@ namespace LinBox {
                     setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
                 }
             }
-            commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
+            // commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
 
             // ----- Compute the next residues!
 
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 2ae9b3aa9..388fb5e78 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -80,6 +80,7 @@ namespace LinBox {
             for (auto i = 0u; i < _lc.length(); ++i) {
                 _lc.next(digits);
 
+                #pragma omp parallel for
                 for (auto j = 0u; j < _lc.primesCount(); ++j) {
                     // @fixme @cpernet digits being a field vector, this will implicitly cast
                     // each of its elements to a Integer, is there something better?

From 9331b2db7ec84dca63feb9608cae53d9e378a1a5 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 28 Jun 2019 10:51:56 +0200
Subject: [PATCH 41/63] Using correct NUM_THREADS for fgemm

---
 linbox/algorithms/multi-mod-lifting-container.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 97708fdc1..3da315da2 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -350,7 +350,7 @@ namespace LinBox {
                 using FGEMMSequential = FFLAS::ParSeqHelper::Sequential;
                 using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RNSParallel, FGEMMSequential>;
                 using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
-                ComposedParSeqHelper composedParSeqHelper(_primes.size(), _primes.size());
+                ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS);
                 MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
 
                 FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,

From 2764b64ffbbccaeea59ce4ea76e041de91804d96 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 28 Jun 2019 14:20:35 +0200
Subject: [PATCH 42/63] Computing / pj is now parallel and cache friendly.

---
 .../algorithms/multi-mod-lifting-container.h  | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 3da315da2..f081b20b8 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -360,28 +360,30 @@ namespace LinBox {
             // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
             // We divide each residues by the according pj, which is done by multiplying.
-            // @fixme Could be done in parallel!
-            // @fixme @cpernet Don't know why, can't make it parallel!
+            // @note The matrix _rnsR is RNS-major, meaning that it is stored
+            // as [R mod q0][R mod q1][...] where [R mod qh] represents a full matrix.
+            // We use this fact to keep better cache coherency.
             // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p");
-            for (auto i = 0u; i < _n; ++i) {
+            auto rnsStride = 0u;
+            for (auto h = 0u; h < _rnsPrimesCount; ++h) {
+                auto& rnsF = _rnsSystem->_field_rns[h];
+
+                #pragma omp parallel for
                 for (auto j = 0u; j < _primesCount; ++j) {
                     auto& rnsPrimeInverse = _rnsPrimesInverses[j];
-                    auto& rnsR = _rnsR[i * _primesCount + j];
-
-                    // @fixme @cpernet Just doing _rnsDomain->mulin(rnsR, _rnsPrimesInverses[j]);
-                    // But mulin doesn't exist on that domain, and fgemm on 1x1 is much slower
                     auto stridePrimeInverse = rnsPrimeInverse._stride;
-                    auto strideR = rnsR._stride;
-                    for (auto h = 0u; h < _rnsPrimesCount; ++h) {
-                        auto& rnsF = _rnsSystem->_field_rns[h];
-                        rnsF.mulin(rnsR._ptr[h * strideR], rnsPrimeInverse._ptr[h * stridePrimeInverse]);
+                    auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse];
+
+                    for (auto i = 0u; i < _n; ++i) {
+                        rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], rnsPrimeInverseForRnsPrimeH);
                     }
                 }
+
+                rnsStride += _rnsR._stride;
             }
             // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
             // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
-            // @fixme @cpernet Is this parallel?
             FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount,
                                 _rnsR + 0);
             IMD.addin(_rMatrix, _qMatrix);

From e126397227683f8f02c9161734273b753fbbd9f5 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 28 Jun 2019 16:09:55 +0200
Subject: [PATCH 43/63] Now computing the division on uint.

---
 .../algorithms/multi-mod-lifting-container.h  | 22 ++++++++-----------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index f081b20b8..72015389c 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -242,12 +242,10 @@ namespace LinBox {
             _rMatrix = IMatrix(_ring, _n, _primesCount);
             _qMatrix = IMatrix(_ring, _n, _primesCount);
 
-            _R.reserve(_primesCount);
             _FR.reserve(_primesCount);
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto& F = _fields[j];
 
-                _R.emplace_back(_ring, _n);
                 _FR.emplace_back(F, _n);
 
                 // Initialize all residues to b
@@ -308,24 +306,22 @@ namespace LinBox {
             #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
-                auto& R = _R[j];
+                auto& FR = _FR[j];
+                uint64_t upj = pj;
 
                 // @note There is no VectorDomain::divmod yet.
                 // Euclidian division so that rj = pj Qj + Rj
-                // @fixme Should use quoRem on unsigned int, making R an uint vector,
-                // because it will be converted anyway.
+                uint64_t uR;
                 for (auto i = 0u; i < _n; ++i) {
-                    _ring.quoRem(_qMatrix.refEntry(i, j), R[i], _rMatrix.getEntry(i, j), pj);
+                    Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj);
+                    // @note No need to init, because we know that uR < pj,
+                    // so that would do an extra check.
+                    FR[i] = static_cast<FElement>(uR);
                 }
 
-                // Convert R to the field
-                auto& F = _fields[j];
-                auto& FR = _FR[j];
+                // digit = A^{-1} * R mod pj
                 auto& digit = digits[j];
                 auto& B = _B[j];
-
-                // @note The assignment will call the move one, not copying data twice.
-                FR = FVector(F, R); // rebind
                 B.apply(digit, FR);
 
                 // Store the very same result in an RNS system,
@@ -447,7 +443,7 @@ namespace LinBox {
         std::vector<Field> _fields; // All fields Modular<p[i]>
 
         //----- Iteration
-        std::vector<IVector> _R; // Will be inited to RNS within _rnsR
+
         std::vector<FVector> _FR;
         IMatrix _rMatrix;
         IMatrix _qMatrix;

From a4a479d6ae6bf0a986df63de49f68cb649003baa Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Mon, 1 Jul 2019 14:57:16 +0200
Subject: [PATCH 44/63] Working on benchmarks

---
 benchmarks/benchmark-dense-solve.C            | 40 ++++++++----
 .../algorithms/multi-mod-lifting-container.h  | 64 +++++++++++++------
 linbox/solutions/methods.h                    | 14 +++-
 tests/test-solve-full.C                       | 29 +++++++--
 4 files changed, 105 insertions(+), 42 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index a678c52f0..887a73667 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -55,6 +55,7 @@ namespace {
         int bits = 10;
         std::string dispatchString = "Auto";
         std::string methodString = "Auto";
+        std::string rnsFgemmString = "ParallelRnsOnly";
     };
 
     template <typename Vector>
@@ -71,10 +72,10 @@ namespace {
 }
 
 template <typename Field, typename Vector = DenseVector<Field>>
-void benchmark(std::pair<double, double>& timebits, Arguments& args, MethodBase& method)
+void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& method)
 {
-    Field F(args.q);                                 // q is ignored for Integers
-    typename Field::RandIter randIter(F, args.bits); // bits is ignored for ModularRandIter
+    Field F(args.q);                                    // q is ignored for Integers
+    typename Field::RandIter randIter(F, 0, args.bits); // bits is ignored for ModularRandIter
 
 #ifdef _BENCHMARKS_DEBUG_
     std::clog << "Setting A ... " << std::endl;
@@ -128,12 +129,9 @@ void benchmark(std::pair<double, double>& timebits, Arguments& args, MethodBase&
     if (method.master()) {
         chrono.stop();
 
-#ifdef _BENCHMARKS_DEBUG_
-        printVector(std::clog << "(DenseElimination) Solution is ", F, X) << std::endl;
-#endif
-
-        setBitsize(timebits.second, args.q, X);
-        timebits.first = chrono.usertime();
+        timebits[0] = chrono.usertime();
+        timebits[1] = chrono.realtime();
+        setBitsize(timebits[2], args.q, X);
     }
 }
 
@@ -145,14 +143,17 @@ int main(int argc, char** argv)
                      {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n},
                      {'b', "-b", "bit size", TYPE_INT, &args.bits},
                      {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString},
+                     {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString},
                      {'M', "-M",
                       "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, "
-                      "Dixon, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, "
+                      "Dixon, DixonRNS, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, "
                       "Blackbox, Wiedemann, Lanczos).",
                       TYPE_STR, &args.methodString},
                      END_OF_ARGUMENTS};
     LinBox::parseArguments(argc, argv, as);
 
+    commentator().setReportStream(std::cout);
+
     // Setting up context
 
     Communicator communicator(&argc, &argv);
@@ -167,12 +168,21 @@ int main(int argc, char** argv)
     else if (args.dispatchString == "Distributed")  method.dispatch = Dispatch::Distributed;
     else                                            method.dispatch = Dispatch::Auto;
 
+    if (args.rnsFgemmString == "BothParallel")              method.rnsFgemmType = RnsFgemmType::BothParallel;
+    else if (args.rnsFgemmString == "BothSequential")       method.rnsFgemmType = RnsFgemmType::BothSequential;
+    else if (args.rnsFgemmString == "ParallelRnsOnly")      method.rnsFgemmType = RnsFgemmType::ParallelRnsOnly;
+    else if (args.rnsFgemmString == "ParallelFgemmOnly")    method.rnsFgemmType = RnsFgemmType::ParallelFgemmOnly;
+    else {
+        std::cerr << "-r RNS-FGEMM type should be either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly" << std::endl;
+        return EXIT_FAILURE;
+    }
+
     // Real benchmark
 
     bool isModular = false;
     if (args.q > 0) isModular = true;
 
-    using Timing = std::pair<double, double>;
+    using Timing = std::array<double, 3>;
     std::vector<Timing> timebits(args.nbiter);
     for (int iter = 0; iter < args.nbiter; ++iter) {
         if (isModular) {
@@ -185,13 +195,15 @@ int main(int argc, char** argv)
     }
 
 #ifdef _BENCHMARKS_DEBUG_
-    for (const auto& it : timebits) std::clog << it.first << "s, " << it.second << " bits" << std::endl;
+    for (const auto& it : timebits) std::clog << it[0] << "s, " << it[2] << " bits" << std::endl;
 #endif
 
     if (method.master()) {
-        std::sort(timebits.begin(), timebits.end(), [](const Timing& a, const Timing& b) -> bool { return a.first > b.first; });
+        std::sort(timebits.begin(), timebits.end(), [](const Timing& a, const Timing& b) -> bool { return a[0] > b[0]; });
 
-        std::cout << "Time: " << timebits[args.nbiter / 2].first << " Bitsize: " << timebits[args.nbiter / 2].second;
+        std::cout << "UserTime: " << timebits[args.nbiter / 2][0];
+        std::cout << " RealTime: " << timebits[args.nbiter / 2][1];
+        std::cout << " Bitsize: " << timebits[args.nbiter / 2][2];
 
         FFLAS::writeCommandString(std::cout, as) << std::endl;
     }
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 72015389c..d5a7b7a73 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -90,6 +90,7 @@ namespace LinBox {
         MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A,
                                  const IVector& b, const Method::DixonRNS& m)
             : _ring(ring)
+            , _method(m)
             , _A(A)
             , _b(b)
             , _n(A.rowdim())
@@ -174,8 +175,8 @@ namespace LinBox {
                     _B.emplace_back(A, F);
                 }
 
-                // @fixme To be replaced with Paladin
-                #pragma omp parallel for
+// @fixme To be replaced with Paladin
+#pragma omp parallel for
                 for (auto j = 0u; j < _primesCount; ++j) {
                     int nullity = 0;
                     auto& F = _fields[j];
@@ -302,8 +303,8 @@ namespace LinBox {
             VectorDomain<Ring> IVD(_ring);
             BlasMatrixDomain<Ring> IMD(_ring);
 
-            // commentator().start("[MultiModLifting] c = A^{-1} r mod p");
-            #pragma omp parallel for
+// commentator().start("[MultiModLifting] c = A^{-1} r mod p");
+#pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
                 auto& FR = _FR[j];
@@ -326,7 +327,8 @@ namespace LinBox {
 
                 // Store the very same result in an RNS system,
                 // but fact is all the primes of the RNS system are bigger
-                // than the modulus used to compute the digit, we just copy the result for everybody.
+                // than the modulus used to compute the digit, we just copy the result for
+                // everybody.
                 for (auto i = 0u; i < _n; ++i) {
                     setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
                     setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
@@ -338,20 +340,40 @@ namespace LinBox {
 
             // r <= Q + (R - A c) / p
 
-            // By first computing R <= R - A c as a fgemm within the RNS domain.
+#define rns_fgemm(RnsParSeq, FgemmParSeq)                                                          \
+    using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RnsParSeq, FgemmParSeq>;             \
+    using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic,                      \
+                                     FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;     \
+    ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS);                           \
+    MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);                                      \
+                                                                                                   \
+    FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n,      \
+                 _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, _rnsR,         \
+                 _primesCount, mmHelper);
+
+            using RNSParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::RNSModulus,
+                                                              FFLAS::StrategyParameter::Threads>;
+            using FGEMMParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,
+                                                                FFLAS::StrategyParameter::Threads>;
+
+            // @fixme @cpernet @jgdumas Should we move that PAR_BLOCK outside of the function
+            // and let the user do it?
             // commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
             PAR_BLOCK
             {
-                using RNSParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::RNSModulus, FFLAS::StrategyParameter::Threads>;
-                using FGEMMSequential = FFLAS::ParSeqHelper::Sequential;
-                using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RNSParallel, FGEMMSequential>;
-                using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
-                ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS);
-                MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
-
-                FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount,
-                            _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one,
-                            _rnsR, _primesCount, mmHelper);
+                // Firstly compute R <= R - A c as a fgemm within the RNS domain.
+                if (_method.rnsFgemmType == RnsFgemmType::BothSequential) {
+                    rns_fgemm(FFLAS::ParSeqHelper::Sequential, FFLAS::ParSeqHelper::Sequential)
+                }
+                else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) {
+                    rns_fgemm(RNSParallel, FGEMMParallel)
+                }
+                else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) {
+                    rns_fgemm(FFLAS::ParSeqHelper::Sequential, FGEMMParallel)
+                }
+                else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) {
+                    rns_fgemm(RNSParallel, FFLAS::ParSeqHelper::Sequential)
+                }
             }
             // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
@@ -364,14 +386,15 @@ namespace LinBox {
             for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                 auto& rnsF = _rnsSystem->_field_rns[h];
 
-                #pragma omp parallel for
+#pragma omp parallel for
                 for (auto j = 0u; j < _primesCount; ++j) {
                     auto& rnsPrimeInverse = _rnsPrimesInverses[j];
                     auto stridePrimeInverse = rnsPrimeInverse._stride;
                     auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse];
 
                     for (auto i = 0u; i < _n; ++i) {
-                        rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], rnsPrimeInverseForRnsPrimeH);
+                        rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)],
+                                   rnsPrimeInverseForRnsPrimeH);
                     }
                 }
 
@@ -380,8 +403,8 @@ namespace LinBox {
             // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
 
             // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
-            FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount,
-                                _rnsR + 0);
+            FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(),
+                                _primesCount, _rnsR + 0);
             IMD.addin(_rMatrix, _qMatrix);
             // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R");
 
@@ -413,6 +436,7 @@ namespace LinBox {
 
     public: // @fixme BACK TO PRIVATE!
         const Ring& _ring;
+        Method::DixonRNS _method; // A copy of the user-provided method.
 
         // The problem: A^{-1} * b
         const IMatrix& _A;
diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h
index fb73844a1..5222050b6 100644
--- a/linbox/solutions/methods.h
+++ b/linbox/solutions/methods.h
@@ -178,6 +178,17 @@ namespace LinBox {
         Linear,
     };
 
+    /**
+     * When running FFLAS's fgemm on an RNS structure,
+     * how the composed ParSeqHelper should be configured.
+     */
+    enum class RnsFgemmType {
+        BothParallel,
+        BothSequential,
+        ParallelRnsOnly,
+        ParallelFgemmOnly,
+    };
+
     /**
      * Holds everything a method needs to know about the problem.
      *
@@ -223,7 +234,8 @@ namespace LinBox {
                                                 //!  that the provided denominator is minimal.
 
         // ----- For DixonRNS method.
-        uint32_t primesCount = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl.
+        uint32_t primesCount = 8u; //!< How many primes to use lifting will be done over p = p1p2...pl.
+        RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly;
 
         // ----- For random-based systems.
         size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up.
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index a30783798..862779717 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -97,14 +97,14 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul
 {
     std::cout << "Checking result..." << std::endl;
 
-    ResultVector RAx(RA.field(), Rb.size());
-    RA.apply(RAx, x);
+    // ResultVector RAx(RA.field(), Rb.size());
+    // RA.apply(RAx, x);
 
-    VectorDomain<typename ResultMatrix::Field> VD(RA.field());
-    if (!VD.areEqual(RAx, Rb)) {
-        print_error<SolveMethod>(x, A, b, "Ax != b");
-        return false;
-    }
+    // VectorDomain<typename ResultMatrix::Field> VD(RA.field());
+    // if (!VD.areEqual(RAx, Rb)) {
+    //     print_error<SolveMethod>(x, A, b, "Ax != b");
+    //     return false;
+    // }
 
     std::cout << "Result OK !" << std::endl;
 
@@ -218,6 +218,7 @@ int main(int argc, char** argv)
     int m = 32;
     int n = 24;
     std::string dispatchString = "Auto";
+    std::string rnsFgemmString = "ParallelRnsOnly";
 
     static Argument args[] = {
         {'q', "-q", "Field characteristic.", TYPE_INTEGER, &q},
@@ -230,6 +231,7 @@ int main(int argc, char** argv)
         {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m},
         {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n},
         {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP or Distributed).", TYPE_STR, &dispatchString},
+        {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &rnsFgemmString},
         END_OF_ARGUMENTS};
 
     parseArguments(argc, argv, args);
@@ -252,6 +254,19 @@ int main(int argc, char** argv)
         return EXIT_FAILURE;
     }
 
+    if (rnsFgemmString == "BothParallel")
+        method.rnsFgemmType = RnsFgemmType::BothParallel;
+    else if (rnsFgemmString == "BothSequential")
+        method.rnsFgemmType = RnsFgemmType::BothSequential;
+    else if (rnsFgemmString == "ParallelRnsOnly")
+        method.rnsFgemmType = RnsFgemmType::ParallelRnsOnly;
+    else if (rnsFgemmString == "ParallelFgemmOnly")
+        method.rnsFgemmType = RnsFgemmType::ParallelFgemmOnly;
+    else {
+        std::cerr << "-r RNS-FGEMM type should be either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly" << std::endl;
+        return EXIT_FAILURE;
+    }
+
     if (primesCount > 0) {
         method.primesCount = primesCount;
     }

From 62ce5b7af013cc57f4fa36574df3a1922f47b2eb Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 2 Jul 2019 11:02:26 +0200
Subject: [PATCH 45/63] Added arguments to benchmark-dense-solve.

---
 benchmarks/benchmark-dense-solve.C            |  8 +++
 .../algorithms/multi-mod-lifting-container.h  | 62 +++++++++----------
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index 887a73667..d0f7c9015 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -53,6 +53,7 @@ namespace {
         int nbiter = 3;
         int n = 500;
         int bits = 10;
+        int primesCount = 8;
         std::string dispatchString = "Auto";
         std::string methodString = "Auto";
         std::string rnsFgemmString = "ParallelRnsOnly";
@@ -137,6 +138,8 @@ void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& met
 
 int main(int argc, char** argv)
 {
+    int numThreads = 1;
+
     Arguments args;
     Argument as[] = {{'i', "-i", "Set number of repetitions.", TYPE_INT, &args.nbiter},
                      {'q', "-q", "Set the field characteristic (-1 for rationals).", TYPE_INTEGER, &args.q},
@@ -144,6 +147,8 @@ int main(int argc, char** argv)
                      {'b', "-b", "bit size", TYPE_INT, &args.bits},
                      {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString},
                      {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString},
+                     {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &args.primesCount},
+		             {'t', "-t", "Number of threads.", TYPE_INT, &numThreads },
                      {'M', "-M",
                       "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, "
                       "Dixon, DixonRNS, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, "
@@ -154,6 +159,8 @@ int main(int argc, char** argv)
 
     commentator().setReportStream(std::cout);
 
+    omp_set_num_threads(numThreads);
+
     // Setting up context
 
     Communicator communicator(&argc, &argv);
@@ -163,6 +170,7 @@ int main(int argc, char** argv)
 
     MethodBase method;
     method.pCommunicator = &communicator;
+    method.primesCount = args.primesCount;
     if (args.dispatchString == "Sequential")        method.dispatch = Dispatch::Sequential;
     else if (args.dispatchString == "SMP")          method.dispatch = Dispatch::SMP;
     else if (args.dispatchString == "Distributed")  method.dispatch = Dispatch::Distributed;
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index d5a7b7a73..966c25c5b 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -340,17 +340,6 @@ namespace LinBox {
 
             // r <= Q + (R - A c) / p
 
-#define rns_fgemm(RnsParSeq, FgemmParSeq)                                                          \
-    using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RnsParSeq, FgemmParSeq>;             \
-    using MMHelper = FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic,                      \
-                                     FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;     \
-    ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS);                           \
-    MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);                                      \
-                                                                                                   \
-    FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n,      \
-                 _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, _rnsR,         \
-                 _primesCount, mmHelper);
-
             using RNSParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::RNSModulus,
                                                               FFLAS::StrategyParameter::Threads>;
             using FGEMMParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,
@@ -359,22 +348,20 @@ namespace LinBox {
             // @fixme @cpernet @jgdumas Should we move that PAR_BLOCK outside of the function
             // and let the user do it?
             // commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
-            PAR_BLOCK
-            {
-                // Firstly compute R <= R - A c as a fgemm within the RNS domain.
-                if (_method.rnsFgemmType == RnsFgemmType::BothSequential) {
-                    rns_fgemm(FFLAS::ParSeqHelper::Sequential, FFLAS::ParSeqHelper::Sequential)
-                }
-                else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) {
-                    rns_fgemm(RNSParallel, FGEMMParallel)
-                }
-                else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) {
-                    rns_fgemm(FFLAS::ParSeqHelper::Sequential, FGEMMParallel)
-                }
-                else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) {
-                    rns_fgemm(RNSParallel, FFLAS::ParSeqHelper::Sequential)
-                }
+            // Firstly compute R <= R - A c as a fgemm within the RNS domain.
+            if (_method.rnsFgemmType == RnsFgemmType::BothSequential) {
+                rns_fgemm<FFLAS::ParSeqHelper::Sequential, FFLAS::ParSeqHelper::Sequential>();
+            }
+            else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) {
+                rns_fgemm<RNSParallel, FGEMMParallel>();
             }
+            else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) {
+                rns_fgemm<FFLAS::ParSeqHelper::Sequential, FGEMMParallel>();
+            }
+            else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) {
+                rns_fgemm<RNSParallel, FFLAS::ParSeqHelper::Sequential>();
+            }
+
             // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
             // We divide each residues by the according pj, which is done by multiplying.
@@ -424,14 +411,23 @@ namespace LinBox {
             }
         }
 
-        inline void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j)
+        // @note This allows us to factor out some of the rns fgemm variants common code.
+        template <class RnsParSeq, class FgemmParSeq>
+        inline void rns_fgemm()
         {
-            auto& Aij = A[i * lda + j];
-            Integer reconstructedInteger;
-            FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &reconstructedInteger, 1, A + (i * lda + j));
-            std::cout << i << " " << j << " ";
-            _rnsDomain->write(std::cout, Aij);
-            std::cout << " -> " << reconstructedInteger << std::endl;
+            PAR_BLOCK
+            {
+                using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose<RnsParSeq, FgemmParSeq>;
+                using MMHelper =
+                    FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic,
+                                    FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
+                ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS);
+                MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
+
+                FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n,
+                             _primesCount, _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount,
+                             _rnsDomain->one, _rnsR, _primesCount, mmHelper);
+            }
         }
 
     public: // @fixme BACK TO PRIVATE!

From 50b510d90858c9ab2393c1a006e9d17fe71f8959 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Wed, 3 Jul 2019 11:16:19 +0200
Subject: [PATCH 46/63] Added seed to benchmark-dense-solve

---
 benchmarks/benchmark-dense-solve.C | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index d0f7c9015..56c329de9 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -53,6 +53,7 @@ namespace {
         int nbiter = 3;
         int n = 500;
         int bits = 10;
+        int seed = -1;
         int primesCount = 8;
         std::string dispatchString = "Auto";
         std::string methodString = "Auto";
@@ -75,8 +76,8 @@ namespace {
 template <typename Field, typename Vector = DenseVector<Field>>
 void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& method)
 {
-    Field F(args.q);                                    // q is ignored for Integers
-    typename Field::RandIter randIter(F, 0, args.bits); // bits is ignored for ModularRandIter
+    Field F(args.q); // q is ignored for Integers
+    typename Field::RandIter randIter(F, args.seed, args.bits); // bits is ignored for ModularRandIter
 
 #ifdef _BENCHMARKS_DEBUG_
     std::clog << "Setting A ... " << std::endl;
@@ -145,6 +146,7 @@ int main(int argc, char** argv)
                      {'q', "-q", "Set the field characteristic (-1 for rationals).", TYPE_INTEGER, &args.q},
                      {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n},
                      {'b', "-b", "bit size", TYPE_INT, &args.bits},
+                     {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed},
                      {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString},
                      {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString},
                      {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &args.primesCount},
@@ -157,10 +159,12 @@ int main(int argc, char** argv)
                      END_OF_ARGUMENTS};
     LinBox::parseArguments(argc, argv, as);
 
-    commentator().setReportStream(std::cout);
-
     omp_set_num_threads(numThreads);
 
+    if (args.seed < 0) {
+        args.seed = time(nullptr);
+    }
+
     // Setting up context
 
     Communicator communicator(&argc, &argv);

From 24c3b14aa403b9b7edcd12403df13a42ea966752 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Wed, 3 Jul 2019 16:48:56 +0200
Subject: [PATCH 47/63] Rewrite omp with paladin for
 multi-mod-lifting-container

---
 .../algorithms/multi-mod-lifting-container.h  | 102 +++++++++++++++++-
 1 file changed, 98 insertions(+), 4 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 966c25c5b..19c74dd43 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -174,19 +174,50 @@ namespace LinBox {
                 for (auto& F : _fields) {
                     _B.emplace_back(A, F);
                 }
-
+#if 0
 // @fixme To be replaced with Paladin
 #pragma omp parallel for
                 for (auto j = 0u; j < _primesCount; ++j) {
-                    int nullity = 0;
+                    int nullity = 0; //TODO: it may be necessary to replace nullity with a vector
                     auto& F = _fields[j];
                     BlasMatrixDomain<Field> bmd(F);
                     bmd.invin(_B[j], nullity);
+                    if (nullity > 0) {//TODO: it may be easier to move this condition check outside the parallel region => loop through the whole vector to add up all values as final value for the condition test which could be parallelized further more
+                        // @fixme Should redraw another prime!
+                        throw LinBoxError("Wrong prime, sorry.");
+                    }
+                }
+#else 
+
+                PAR_BLOCK{
+                    std::vector<int> vnullity;vnullity.reserve(_primesCount);
+                    auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                    int M = _primesCount;
+                    SYNCH_GROUP({
+                        FORBLOCK1D(iter, M, sp,
+                            TASK(MODE(CONSTREFERENCE(vnullity) ),
+                                for(auto j=iter.begin(); j!=iter.end(); ++j)
+                                {
+                                    auto& F = _fields[j];
+                                    BlasMatrixDomain<Field> bmd(F);
+                                    bmd.invin(_B[j], vnullity[j]);
+                                }
+                            )
+                        )
+                    });
+
+                    int nullity = 0;
+                    for (size_t i=0; i<_primesCount; ++i){
+                        nullity += vnullity[i];
+                    }
                     if (nullity > 0) {
                         // @fixme Should redraw another prime!
                         throw LinBoxError("Wrong prime, sorry.");
                     }
+                     
                 }
+#endif
+
             }
             // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations");
 
@@ -304,6 +335,7 @@ namespace LinBox {
             BlasMatrixDomain<Ring> IMD(_ring);
 
 // commentator().start("[MultiModLifting] c = A^{-1} r mod p");
+#if 0
 #pragma omp parallel for
             for (auto j = 0u; j < _primesCount; ++j) {
                 auto pj = _primes[j];
@@ -334,6 +366,47 @@ namespace LinBox {
                     setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
                 }
             }
+#else
+            PAR_BLOCK{
+                auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                int M = _primesCount;
+                //SYNCH_GROUP({
+                FORBLOCK1D(iter, M, sp,
+                    TASK(MODE(CONSTREFERENCE(digits) ),{
+                        for(auto j=iter.begin(); j!=iter.end(); ++j) {
+                            auto pj = _primes[j];
+                            auto& FR = _FR[j];
+                            uint64_t upj = pj;
+
+                            // @note There is no VectorDomain::divmod yet.
+                            // Euclidian division so that rj = pj Qj + Rj
+                            uint64_t uR;
+                            for (auto i = 0u; i < _n; ++i) {
+                                Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj);
+                                // @note No need to init, because we know that uR < pj,
+                                // so that would do an extra check.
+                                FR[i] = static_cast<FElement>(uR);
+                            }
+
+                            // digit = A^{-1} * R mod pj
+                            auto& digit = digits[j];
+                            auto& B = _B[j];
+                            B.apply(digit, FR);
+
+                            // Store the very same result in an RNS system,
+                            // but fact is all the primes of the RNS system are bigger
+                            // than the modulus used to compute the digit, we just copy the result for
+                            // everybody.
+                            for (auto i = 0u; i < _n; ++i) {
+                                setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
+                                setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
+                            }
+                        }
+                    })
+                )
+                //});                     
+            }
+#endif
             // commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
 
             // ----- Compute the next residues!
@@ -372,7 +445,7 @@ namespace LinBox {
             auto rnsStride = 0u;
             for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                 auto& rnsF = _rnsSystem->_field_rns[h];
-
+#if 0
 #pragma omp parallel for
                 for (auto j = 0u; j < _primesCount; ++j) {
                     auto& rnsPrimeInverse = _rnsPrimesInverses[j];
@@ -384,7 +457,28 @@ namespace LinBox {
                                    rnsPrimeInverseForRnsPrimeH);
                     }
                 }
-
+#else
+                PAR_BLOCK{
+
+                    auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                    int M = _primesCount;
+                    //SYNCH_GROUP({
+                    FORBLOCK1D(iter, M, sp,
+                        TASK(MODE(CONSTREFERENCE(digits) ),{
+                            for(auto j=iter.begin(); j!=iter.end(); ++j) {
+                                auto& rnsPrimeInverse = _rnsPrimesInverses[j];
+                                auto stridePrimeInverse = rnsPrimeInverse._stride;
+                                auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse];
+
+                                for (auto i = 0u; i < _n; ++i) {
+                                    rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)],
+                                               rnsPrimeInverseForRnsPrimeH);
+                                }
+                            }
+                        })
+                    );
+                }
+#endif
                 rnsStride += _rnsR._stride;
             }
             // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");

From 287fb66ff355beb292b1ac5c6511a1d9f9c00d79 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 12 Jul 2019 10:28:59 +0200
Subject: [PATCH 48/63] OK

---
 tests/test-solve-full.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 862779717..3ebe444ae 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -310,7 +310,7 @@ int main(int argc, char** argv)
         // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
         // // ----- Rational Dixon
-        ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // // @fixme Dixon<Wiedemann> does not compile
         // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);

From df893214ccbf8fac541f5af4cc31b5df8c52a7e6 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 25 Jul 2019 18:06:24 +0200
Subject: [PATCH 49/63] FIxed some wrong result in DixonRNS

---
 .../algorithms/multi-mod-lifting-container.h  | 166 ++++++------------
 .../rational-cra-builder-full-multip.h        |   3 +
 linbox/solutions/hadamard-bound.h             |   5 +-
 linbox/solutions/methods.h                    |   4 +-
 linbox/solutions/solve/solve-dixon-rns.h      |   8 +-
 tests/test-solve-full.C                       |  16 +-
 6 files changed, 76 insertions(+), 126 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 19c74dd43..0b39457d9 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -105,6 +105,11 @@ namespace LinBox {
 
             // This will contain the primes or our MultiMod basis
             _primesCount = m.primesCount;
+            if (_primesCount == -1u) {
+                PAR_BLOCK { _primesCount = 6 * NUM_THREADS; }
+            }
+            std::cout << _primesCount << std::endl;
+
             _primes.resize(_primesCount);
 
             // Some preparation work
@@ -174,50 +179,27 @@ namespace LinBox {
                 for (auto& F : _fields) {
                     _B.emplace_back(A, F);
                 }
-#if 0
-// @fixme To be replaced with Paladin
-#pragma omp parallel for
-                for (auto j = 0u; j < _primesCount; ++j) {
-                    int nullity = 0; //TODO: it may be necessary to replace nullity with a vector
-                    auto& F = _fields[j];
-                    BlasMatrixDomain<Field> bmd(F);
-                    bmd.invin(_B[j], nullity);
-                    if (nullity > 0) {//TODO: it may be easier to move this condition check outside the parallel region => loop through the whole vector to add up all values as final value for the condition test which could be parallelized further more
-                        // @fixme Should redraw another prime!
-                        throw LinBoxError("Wrong prime, sorry.");
-                    }
-                }
-#else 
 
-                PAR_BLOCK{
-                    std::vector<int> vnullity;vnullity.reserve(_primesCount);
-                    auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                PAR_BLOCK
+                {
+                    std::vector<int> nullities(_primesCount);
+                    auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
+                                       FFLAS::StrategyParameter::Threads);
                     int M = _primesCount;
-                    SYNCH_GROUP({
-                        FORBLOCK1D(iter, M, sp,
-                            TASK(MODE(CONSTREFERENCE(vnullity) ),
-                                for(auto j=iter.begin(); j!=iter.end(); ++j)
-                                {
-                                    auto& F = _fields[j];
-                                    BlasMatrixDomain<Field> bmd(F);
-                                    bmd.invin(_B[j], vnullity[j]);
-                                }
-                            )
-                        )
+                    FOR1D(j, M, sp, MODE(CONSTREFERENCE(nullities)), {
+                        auto& F = _fields[j];
+                        BlasMatrixDomain<Field> bmd(F);
+                        bmd.invin(_B[j], nullities[j]);
                     });
 
-                    int nullity = 0;
-                    for (size_t i=0; i<_primesCount; ++i){
-                        nullity += vnullity[i];
-                    }
-                    if (nullity > 0) {
-                        // @fixme Should redraw another prime!
-                        throw LinBoxError("Wrong prime, sorry.");
+                    for (auto nullity : nullities) {
+                        if (nullity > 0) {
+                            // @fixme Should redraw another prime!
+                            std::cout << "----------------------------- NULLITY" << std::endl;
+                            throw LinBoxError("Wrong prime, sorry.");
+                        }
                     }
-                     
                 }
-#endif
-
             }
             // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations");
 
@@ -335,45 +317,15 @@ namespace LinBox {
             BlasMatrixDomain<Ring> IMD(_ring);
 
 // commentator().start("[MultiModLifting] c = A^{-1} r mod p");
-#if 0
-#pragma omp parallel for
-            for (auto j = 0u; j < _primesCount; ++j) {
-                auto pj = _primes[j];
-                auto& FR = _FR[j];
-                uint64_t upj = pj;
-
-                // @note There is no VectorDomain::divmod yet.
-                // Euclidian division so that rj = pj Qj + Rj
-                uint64_t uR;
-                for (auto i = 0u; i < _n; ++i) {
-                    Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj);
-                    // @note No need to init, because we know that uR < pj,
-                    // so that would do an extra check.
-                    FR[i] = static_cast<FElement>(uR);
-                }
-
-                // digit = A^{-1} * R mod pj
-                auto& digit = digits[j];
-                auto& B = _B[j];
-                B.apply(digit, FR);
-
-                // Store the very same result in an RNS system,
-                // but fact is all the primes of the RNS system are bigger
-                // than the modulus used to compute the digit, we just copy the result for
-                // everybody.
-                for (auto i = 0u; i < _n; ++i) {
-                    setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
-                    setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
-                }
-            }
-#else
-            PAR_BLOCK{
-                auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+            PAR_BLOCK
+            {
+                auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
+                                   FFLAS::StrategyParameter::Threads);
                 int M = _primesCount;
-                //SYNCH_GROUP({
-                FORBLOCK1D(iter, M, sp,
-                    TASK(MODE(CONSTREFERENCE(digits) ),{
-                        for(auto j=iter.begin(); j!=iter.end(); ++j) {
+                SYNCH_GROUP({
+                FORBLOCK1D(
+                    iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), {
+                        for (auto j = iter.begin(); j != iter.end(); ++j) {
                             auto pj = _primes[j];
                             auto& FR = _FR[j];
                             uint64_t upj = pj;
@@ -382,7 +334,8 @@ namespace LinBox {
                             // Euclidian division so that rj = pj Qj + Rj
                             uint64_t uR;
                             for (auto i = 0u; i < _n; ++i) {
-                                Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj);
+                                Integer::divmod(_qMatrix.refEntry(i, j), uR,
+                                                _rMatrix.getEntry(i, j), upj);
                                 // @note No need to init, because we know that uR < pj,
                                 // so that would do an extra check.
                                 FR[i] = static_cast<FElement>(uR);
@@ -395,18 +348,16 @@ namespace LinBox {
 
                             // Store the very same result in an RNS system,
                             // but fact is all the primes of the RNS system are bigger
-                            // than the modulus used to compute the digit, we just copy the result for
-                            // everybody.
+                            // than the modulus used to compute the digit, we just copy
+                            // the result for everybody.
                             for (auto i = 0u; i < _n; ++i) {
                                 setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
                                 setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
                             }
                         }
-                    })
-                )
-                //});                     
+                    }))
+                });
             }
-#endif
             // commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
 
             // ----- Compute the next residues!
@@ -445,40 +396,29 @@ namespace LinBox {
             auto rnsStride = 0u;
             for (auto h = 0u; h < _rnsPrimesCount; ++h) {
                 auto& rnsF = _rnsSystem->_field_rns[h];
-#if 0
-#pragma omp parallel for
-                for (auto j = 0u; j < _primesCount; ++j) {
-                    auto& rnsPrimeInverse = _rnsPrimesInverses[j];
-                    auto stridePrimeInverse = rnsPrimeInverse._stride;
-                    auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse];
-
-                    for (auto i = 0u; i < _n; ++i) {
-                        rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)],
-                                   rnsPrimeInverseForRnsPrimeH);
-                    }
-                }
-#else
-                PAR_BLOCK{
 
-                    auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                PAR_BLOCK
+                {
+                    auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
+                                       FFLAS::StrategyParameter::Threads);
                     int M = _primesCount;
-                    //SYNCH_GROUP({
-                    FORBLOCK1D(iter, M, sp,
-                        TASK(MODE(CONSTREFERENCE(digits) ),{
-                            for(auto j=iter.begin(); j!=iter.end(); ++j) {
-                                auto& rnsPrimeInverse = _rnsPrimesInverses[j];
-                                auto stridePrimeInverse = rnsPrimeInverse._stride;
-                                auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse];
-
-                                for (auto i = 0u; i < _n; ++i) {
-                                    rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)],
+                    SYNCH_GROUP({
+                    FORBLOCK1D(iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), {
+                                   for (auto j = iter.begin(); j != iter.end(); ++j) {
+                                       auto& rnsPrimeInverse = _rnsPrimesInverses[j];
+                                       auto stridePrimeInverse = rnsPrimeInverse._stride;
+                                       auto rnsPrimeInverseForRnsPrimeH =
+                                           rnsPrimeInverse._ptr[h * stridePrimeInverse];
+
+                                       for (auto i = 0u; i < _n; ++i) {
+                                           rnsF.mulin(
+                                               _rnsR._ptr[rnsStride + (i * _primesCount + j)],
                                                rnsPrimeInverseForRnsPrimeH);
-                                }
-                            }
-                        })
-                    );
+                                       }
+                                   }
+                               }))});
                 }
-#endif
+
                 rnsStride += _rnsR._stride;
             }
             // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p");
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index f5dc17f92..c1b03c06f 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -70,11 +70,14 @@ namespace LinBox
 		{
             commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction");
             Father_t::result(num, false);
+            std::cout << "num[0]: " << num[0] << std::endl;
+            std::cout << "numBound: " << numBound << std::endl;
             commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction");
 
             commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction");
             den = 1;
             const auto& mod = Father_t::getModulus();
+            std::cout << "mod: " << mod << std::endl;
             Integer nd;
             for (auto num_it = num.begin(); num_it != num.end(); ++num_it) {
                 iterativeratrecon(*num_it, nd, den, mod, numBound);
diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h
index 00fe6e92e..ba00bc071 100644
--- a/linbox/solutions/hadamard-bound.h
+++ b/linbox/solutions/hadamard-bound.h
@@ -446,8 +446,9 @@ namespace LinBox {
             bNorm += 1;
         }
 
-        data.denBound = hadamardBound.bound;
-        data.numBound = hadamardBound.boundOverMinNorm * bNorm;
+        // @note RR expects the bounds to be strict, this is why we add a + 1
+        data.denBound = hadamardBound.bound + 1;
+        data.numBound = hadamardBound.boundOverMinNorm * bNorm + 1;
         if (data.denBound == 0 || data.numBound == 0) {
             data.solutionLogBound = 0.0;
         }
diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h
index 5222050b6..63b7af584 100644
--- a/linbox/solutions/methods.h
+++ b/linbox/solutions/methods.h
@@ -234,7 +234,9 @@ namespace LinBox {
                                                 //!  that the provided denominator is minimal.
 
         // ----- For DixonRNS method.
-        uint32_t primesCount = 8u; //!< How many primes to use lifting will be done over p = p1p2...pl.
+        //! How many primes to use lifting will be done over p = p1p2...pl.
+        //! -1 means automatically set to a heuristic value.
+        uint32_t primesCount = -1u;
         RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly;
 
         // ----- For random-based systems.
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 388fb5e78..d153765c2 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -89,6 +89,9 @@ namespace LinBox {
                     _lc.ring().mulin(radices[j], _lc.prime(j));
                 }
             }
+            for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                std::cout << "radices[" << j << "] " << radices[j] << std::endl;
+            }
             commentator().stop("[MultiModLifting] Lifting");
 
             // CRT reconstruction from paddicAccumulations
@@ -109,8 +112,9 @@ namespace LinBox {
             commentator().stop("[MultiModLifting] CRT Reconstruction Progress");
 
             // Rational reconstruction
-            // @note RR expects the bounds to be strict, this is why we add a + 1
-            craBuilder.result(xNum, xDen, _lc.numBound() + 1);
+            craBuilder.result(xNum, xDen, _lc.numBound());
+            std::cout << "xNum[0] " << xNum[0] << std::endl;
+            std::cout << "xDen " << xDen << std::endl;
 
             return true;
         }
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 3ebe444ae..af17eef6c 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -97,14 +97,14 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul
 {
     std::cout << "Checking result..." << std::endl;
 
-    // ResultVector RAx(RA.field(), Rb.size());
-    // RA.apply(RAx, x);
-
-    // VectorDomain<typename ResultMatrix::Field> VD(RA.field());
-    // if (!VD.areEqual(RAx, Rb)) {
-    //     print_error<SolveMethod>(x, A, b, "Ax != b");
-    //     return false;
-    // }
+    ResultVector RAx(RA.field(), Rb.size());
+    RA.apply(RAx, x);
+
+    VectorDomain<typename ResultMatrix::Field> VD(RA.field());
+    if (!VD.areEqual(RAx, Rb)) {
+        print_error<SolveMethod>(x, A, b, "Ax != b");
+        return false;
+    }
 
     std::cout << "Result OK !" << std::endl;
 

From e840f25193aaa4da4a45508968b13259ea424c0c Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Mon, 5 Aug 2019 10:45:25 +0200
Subject: [PATCH 50/63] CLean up and last fixes

---
 .../algorithms/multi-mod-lifting-container.h   |  3 +--
 .../rational-cra-builder-full-multip.h         |  3 ---
 linbox/solutions/solve/solve-dixon-rns.h       | 18 ++++++------------
 tests/test-solve-full.C                        | 14 +++++++++-----
 4 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 0b39457d9..04a60d0e3 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -108,7 +108,6 @@ namespace LinBox {
             if (_primesCount == -1u) {
                 PAR_BLOCK { _primesCount = 6 * NUM_THREADS; }
             }
-            std::cout << _primesCount << std::endl;
 
             _primes.resize(_primesCount);
 
@@ -248,7 +247,7 @@ namespace LinBox {
 
                 // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...)
                 _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct);
-                std::cout << "_iterationsCount " << _iterationsCount << std::endl;
+                // std::cout << "_iterationsCount " << _iterationsCount << std::endl;
             }
 
             //----- Locals setup
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index c1b03c06f..f5dc17f92 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -70,14 +70,11 @@ namespace LinBox
 		{
             commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction");
             Father_t::result(num, false);
-            std::cout << "num[0]: " << num[0] << std::endl;
-            std::cout << "numBound: " << numBound << std::endl;
             commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction");
 
             commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction");
             den = 1;
             const auto& mod = Father_t::getModulus();
-            std::cout << "mod: " << mod << std::endl;
             Integer nd;
             for (auto num_it = num.begin(); num_it != num.end(); ++num_it) {
                 iterativeratrecon(*num_it, nd, den, mod, numBound);
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index d153765c2..5d6cd798f 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -80,7 +80,7 @@ namespace LinBox {
             for (auto i = 0u; i < _lc.length(); ++i) {
                 _lc.next(digits);
 
-                #pragma omp parallel for
+#pragma omp parallel for
                 for (auto j = 0u; j < _lc.primesCount(); ++j) {
                     // @fixme @cpernet digits being a field vector, this will implicitly cast
                     // each of its elements to a Integer, is there something better?
@@ -89,16 +89,12 @@ namespace LinBox {
                     _lc.ring().mulin(radices[j], _lc.prime(j));
                 }
             }
-            for (auto j = 0u; j < _lc.primesCount(); ++j) {
-                std::cout << "radices[" << j << "] " << radices[j] << std::endl;
-            }
             commentator().stop("[MultiModLifting] Lifting");
 
             // CRT reconstruction from paddicAccumulations
             commentator().start("[MultiModLifting] CRT Reconstruction Progress");
             using CRAField = Givaro::Modular<Integer>;
-            RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound()
-                                                              / 1.4427); // 1.4427 = 1 / log(2)
+            RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2)
 
             {
                 CRAField field(radices[0]);
@@ -113,8 +109,6 @@ namespace LinBox {
 
             // Rational reconstruction
             craBuilder.result(xNum, xDen, _lc.numBound());
-            std::cout << "xNum[0] " << xNum[0] << std::endl;
-            std::cout << "xDen " << xDen << std::endl;
 
             return true;
         }
@@ -138,8 +132,8 @@ namespace LinBox {
          * Dense solving.
          */
         template <class RVector, class Vector>
-        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
-                   const Vector& b, const Method::DixonRNS& m)
+        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
+                   const Method::DixonRNS& m)
         {
             // @fixme We should use some code from DixonSolver...
             // But that's hard so we just assume that A is square and invertible.
@@ -166,8 +160,8 @@ namespace LinBox {
      * \brief Solve specialisation for DixonRNS on dense matrices.
      */
     template <class RVector, class Ring, class Vector>
-    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A,
-               const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
+    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
+               const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
     {
         commentator().start("solve.dixon-rns.integer.dense");
 
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index af17eef6c..d9f825b26 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -93,9 +93,11 @@ namespace {
 }
 
 template <class SolveMethod, class Matrix, class Vector, class ResultMatrix, class ResultVector>
-bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb)
+bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb, bool verbose)
 {
-    std::cout << "Checking result..." << std::endl;
+    if (verbose) {
+        std::cout << "Checking result..." << std::endl;
+    }
 
     ResultVector RAx(RA.field(), Rb.size());
     RA.apply(RAx, x);
@@ -106,7 +108,9 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul
         return false;
     }
 
-    std::cout << "Result OK !" << std::endl;
+    if (verbose) {
+        std::cout << "Result OK !" << std::endl;
+    }
 
     return true;
 }
@@ -145,11 +149,11 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R
     bool ok = true;
     try {
         solve(x, A, b, method);
-        ok = check_result<SolveMethod>(x, A, b, RA, Rb);
+        ok = check_result<SolveMethod>(x, A, b, RA, Rb, verbose);
 
         // if (ok) {
         //     solveInPlace(x, A, b, method);
-        //     ok = check_result<SolveMethod>(x, A, b, RA, Rb);
+        //     ok = check_result<SolveMethod>(x, A, b, RA, Rb, verbose);
         // }
     } catch (...) {
         print_error<SolveMethod>(x, A, b, "throws error");

From afc509d5ae10a9bf6f11400dc90e7c7523323168 Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Wed, 14 Aug 2019 10:00:34 +0200
Subject: [PATCH 51/63] Using FOR1D directly. Still wrong results!

---
 .../dixon-solver/dixon-solver-dense.inl       |  26 +++-
 linbox/algorithms/lifting-container.h         |   2 +-
 .../algorithms/multi-mod-lifting-container.h  | 115 +++++++++--------
 .../multi-mod-rational-reconstruction.h       | 117 ++++++++++++++++++
 linbox/solutions/solve/solve-dixon-rns.h      |  98 +--------------
 tests/test-solve-full.C                       |   2 +-
 6 files changed, 201 insertions(+), 159 deletions(-)
 create mode 100644 linbox/algorithms/multi-mod-rational-reconstruction.h

diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
index ed0978683..c6a2a81b3 100644
--- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
+++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
@@ -24,8 +24,10 @@
 #include "linbox/util/debug.h"
 
 #include "linbox/algorithms/lifting-container.h"
+#include "linbox/algorithms/multi-mod-lifting-container.h"
 #include "linbox/algorithms/matrix-inverse.h"
 #include "linbox/algorithms/rational-reconstruction.h"
+#include "linbox/algorithms/multi-mod-rational-reconstruction.h"
 
 namespace LinBox {
 
@@ -128,15 +130,27 @@ namespace LinBox {
             }
         } while (notfr);
 
-        typedef DixonLiftingContainer<Ring, Field, IMatrix, BlasMatrix<Field>> LiftingContainer;
-        commentator().start("CLASSIC DIXON LIFTING");
-        LiftingContainer lc(_ring, *F, A, *FMP, b, _prime);
-        RationalReconstruction<LiftingContainer> re(lc);
-        if (!re.getRational(num, den, 0)) {
+        // commentator().start("CLASSIC DIXON LIFTING");
+        // typedef DixonLiftingContainer<Ring, Field, IMatrix, BlasMatrix<Field>> LiftingContainer;
+        // LiftingContainer lc(_ring, *F, A, *FMP, b, _prime);
+        // RationalReconstruction<LiftingContainer> re(lc);
+        // if (!re.getRational(num, den, 0)) {
+        //     delete FMP;
+        //     return SS_FAILED;
+        // }
+        // commentator().stop("CLASSIC DIXON LIFTING");
+
+        commentator().start("MULTI MOD DIXON LIFTING");
+        using LiftingContainer = MultiModLiftingContainer<Field, Ring, RandomPrime>;
+        Method::DixonRNS m; // @fixme Get from?
+        LiftingContainer lc(_ring, _genprime, A, b, m);
+        MultiModRationalReconstruction<LiftingContainer> re(lc);
+        if (!re.getRational(num, den)) {
             delete FMP;
             return SS_FAILED;
         }
-        commentator().stop("CLASSIC DIXON LIFTING");
+        commentator().stop("MULTI MOD DIXON LIFTING");
+
 #ifdef RSTIMING
         ttNonsingularSolve.update(re, lc);
 #endif
diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h
index 9664f75f4..2a86bafa6 100644
--- a/linbox/algorithms/lifting-container.h
+++ b/linbox/algorithms/lifting-container.h
@@ -162,8 +162,8 @@ namespace LinBox
             // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime)
             double primeLog2 = Givaro::logtwo(Prime);
             _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down
-			std::cout << "_length "<< _length << std::endl;
 #ifdef DEBUG_LC
+			std::cout << "_length "<< _length << std::endl;
 			std::cout<<" norms computed, p = "<<_p<<"\n";
 			std::cout<<" N = "<<N<<", D = "<<D<<", length = "<<_length<<"\n";
 			_matA.write(std::cout<<"A:=", Tag::FileFormat::Maple) << std::endl;
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 04a60d0e3..a7b1b12fc 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -99,9 +99,9 @@ namespace LinBox {
         {
             linbox_check(A.rowdim() == A.coldim());
 
-            // std::cout << "----------" << std::endl;
-            // A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
-            // std::cout << "b: " << b << std::endl;
+            std::cout << "----------" << std::endl;
+            A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
+            std::cout << "b: " << b << std::endl;
 
             // This will contain the primes or our MultiMod basis
             _primesCount = m.primesCount;
@@ -109,6 +109,8 @@ namespace LinBox {
                 PAR_BLOCK { _primesCount = 6 * NUM_THREADS; }
             }
 
+            // @fixme !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            _primesCount = 2;
             _primes.resize(_primesCount);
 
             // Some preparation work
@@ -122,8 +124,9 @@ namespace LinBox {
                 double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n));
                 _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1));
                 _rnsPrimes.resize(_rnsPrimesCount);
-                // std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
+                std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
 
+                auto trialsLeft = m.trialsBeforeFailure;
                 std::vector<double> primes;
                 for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) {
                     auto p = *primeGenerator;
@@ -131,14 +134,20 @@ namespace LinBox {
 
                     // @note std::lower_bound finds the iterator where to put p in the sorted
                     // container. The name of the routine might be strange, but, hey, that's not my
-                    // fault.
+                    // fault. We check if the prime is already listed.
                     auto lb = std::lower_bound(primes.begin(), primes.end(), p);
                     if (lb != primes.end() && *lb == p) {
+                        if (trialsLeft == 0) {
+                            throw LinboxError("[MultiModLiftingContainer] Not enough primes.");
+                        }
+
                         --j;
+                        --trialsLeft;
                         continue;
                     }
 
                     // Inserting the primes at the right place to keep the array sorted
+                    std::cout << "Adding " << Integer(p) << std::endl;
                     primes.insert(lb, p);
                 }
 
@@ -165,6 +174,7 @@ namespace LinBox {
             // Setting fields up
             for (auto& pj : _primes) {
                 _fields.emplace_back(pj);
+                std::cout << Integer(pj) << std::endl;
             }
 
             // Initialize all inverses
@@ -247,7 +257,7 @@ namespace LinBox {
 
                 // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...)
                 _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct);
-                // std::cout << "_iterationsCount " << _iterationsCount << std::endl;
+                std::cout << "_iterationsCount " << _iterationsCount << std::endl;
             }
 
             //----- Locals setup
@@ -321,40 +331,35 @@ namespace LinBox {
                 auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
                                    FFLAS::StrategyParameter::Threads);
                 int M = _primesCount;
-                SYNCH_GROUP({
-                FORBLOCK1D(
-                    iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), {
-                        for (auto j = iter.begin(); j != iter.end(); ++j) {
-                            auto pj = _primes[j];
-                            auto& FR = _FR[j];
-                            uint64_t upj = pj;
-
-                            // @note There is no VectorDomain::divmod yet.
-                            // Euclidian division so that rj = pj Qj + Rj
-                            uint64_t uR;
-                            for (auto i = 0u; i < _n; ++i) {
-                                Integer::divmod(_qMatrix.refEntry(i, j), uR,
-                                                _rMatrix.getEntry(i, j), upj);
-                                // @note No need to init, because we know that uR < pj,
-                                // so that would do an extra check.
-                                FR[i] = static_cast<FElement>(uR);
-                            }
-
-                            // digit = A^{-1} * R mod pj
-                            auto& digit = digits[j];
-                            auto& B = _B[j];
-                            B.apply(digit, FR);
-
-                            // Store the very same result in an RNS system,
-                            // but fact is all the primes of the RNS system are bigger
-                            // than the modulus used to compute the digit, we just copy
-                            // the result for everybody.
-                            for (auto i = 0u; i < _n; ++i) {
-                                setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
-                                setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
-                            }
-                        }
-                    }))
+                FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), {
+                    auto pj = _primes[j];
+                    auto& FR = _FR[j];
+                    uint64_t upj = pj;
+
+                    // @note There is no VectorDomain::divmod yet.
+                    // Euclidian division so that rj = pj Qj + Rj
+                    uint64_t uR;
+                    for (auto i = 0u; i < _n; ++i) {
+                        Integer::divmod(_qMatrix.refEntry(i, j), uR,
+                                        _rMatrix.getEntry(i, j), upj);
+                        // @note No need to init, because we know that uR < pj,
+                        // so that would do an extra check.
+                        FR[i] = static_cast<FElement>(uR);
+                    }
+
+                    // digit = A^{-1} * R mod pj
+                    auto& digit = digits[j];
+                    auto& B = _B[j];
+                    B.apply(digit, FR);
+
+                    // Store the very same result in an RNS system,
+                    // but fact is all the primes of the RNS system are bigger
+                    // than the modulus used to compute the digit, we just copy
+                    // the result for everybody.
+                    for (auto i = 0u; i < _n; ++i) {
+                        setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]);
+                        setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]);
+                    }
                 });
             }
             // commentator().stop("[MultiModLifting] c = A^{-1} r mod p");
@@ -368,8 +373,6 @@ namespace LinBox {
             using FGEMMParallel = FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,
                                                                 FFLAS::StrategyParameter::Threads>;
 
-            // @fixme @cpernet @jgdumas Should we move that PAR_BLOCK outside of the function
-            // and let the user do it?
             // commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
             // Firstly compute R <= R - A c as a fgemm within the RNS domain.
             if (_method.rnsFgemmType == RnsFgemmType::BothSequential) {
@@ -384,7 +387,6 @@ namespace LinBox {
             else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) {
                 rns_fgemm<RNSParallel, FFLAS::ParSeqHelper::Sequential>();
             }
-
             // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
             // We divide each residues by the according pj, which is done by multiplying.
@@ -401,21 +403,18 @@ namespace LinBox {
                     auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
                                        FFLAS::StrategyParameter::Threads);
                     int M = _primesCount;
-                    SYNCH_GROUP({
-                    FORBLOCK1D(iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), {
-                                   for (auto j = iter.begin(); j != iter.end(); ++j) {
-                                       auto& rnsPrimeInverse = _rnsPrimesInverses[j];
-                                       auto stridePrimeInverse = rnsPrimeInverse._stride;
-                                       auto rnsPrimeInverseForRnsPrimeH =
-                                           rnsPrimeInverse._ptr[h * stridePrimeInverse];
-
-                                       for (auto i = 0u; i < _n; ++i) {
-                                           rnsF.mulin(
-                                               _rnsR._ptr[rnsStride + (i * _primesCount + j)],
-                                               rnsPrimeInverseForRnsPrimeH);
-                                       }
-                                   }
-                               }))});
+                    FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), {
+                        auto& rnsPrimeInverse = _rnsPrimesInverses[j];
+                        auto stridePrimeInverse = rnsPrimeInverse._stride;
+                        auto rnsPrimeInverseForRnsPrimeH =
+                            rnsPrimeInverse._ptr[h * stridePrimeInverse];
+
+                        for (auto i = 0u; i < _n; ++i) {
+                            rnsF.mulin(
+                                _rnsR._ptr[rnsStride + (i * _primesCount + j)],
+                                rnsPrimeInverseForRnsPrimeH);
+                        }
+                    });
                 }
 
                 rnsStride += _rnsR._stride;
diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h
new file mode 100644
index 000000000..6fae6a357
--- /dev/null
+++ b/linbox/algorithms/multi-mod-rational-reconstruction.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2019 LinBox Team
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the  GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * ========LICENCE========
+ */
+
+#pragma once
+
+namespace LinBox {
+    /**
+     * From a MultiModLiftingContainer, will build
+     * the solution on each prime, then will do a CRT reconstruction,
+     * before reconstructing the rational.
+     *
+     * This does not do early termination.
+     */
+    template <class LiftingContainer>
+    class MultiModRationalReconstruction {
+        using Ring = typename LiftingContainer::Ring;
+        using IElement = typename LiftingContainer::IElement;
+        using IVector = typename LiftingContainer::IVector;
+        using FElement = typename LiftingContainer::FElement;
+        using FVector = typename LiftingContainer::FVector;
+
+    public:
+        MultiModRationalReconstruction(LiftingContainer& lc)
+            : _lc(lc)
+        {
+        }
+
+        bool getRational(IVector& xNum, IElement& xDen)
+        {
+            // Early out when the numerator is bounded by zero.
+            if (_lc.numBound() == 0) {
+                for (auto i = 0u; i < _lc.length(); ++i) {
+                    _lc.ring().assign(xNum[i], _lc.ring().zero);
+                }
+                _lc.ring().assign(xDen, _lc.ring().one);
+                return true;
+            }
+
+            commentator().start("[MultiModLifting] Lifting");
+
+            // Temporary structure to store a ci for each pj
+            std::vector<FVector> digits;
+            digits.reserve(_lc.primesCount());
+            for (auto& F : _lc.primesFields()) {
+                digits.emplace_back(F, _lc.size());
+            }
+
+            // The pj^i for each pj
+            std::vector<IElement> radices(_lc.primesCount(), 1);
+
+            // Stores each c0 + c1 pj + ... + ck pj^k for each pj
+            std::vector<IVector> padicAccumulations(_lc.primesCount(), _lc.ring());
+            for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                padicAccumulations[j].resize(_lc.size());
+            }
+
+            // @fixme Better use PolEval (or will it cause memory explosion?)
+            VectorDomain<Ring> IVD(_lc.ring());
+            for (auto i = 0u; i < _lc.length(); ++i) {
+                _lc.next(digits);
+
+				#pragma omp parallel for
+                for (auto j = 0u; j < _lc.primesCount(); ++j) {
+                    // @fixme @cpernet digits being a field vector, this will implicitly cast
+                    // each of its elements to a Integer, is there something better?
+                    // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter
+                    IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
+                    _lc.ring().mulin(radices[j], _lc.prime(j));
+                }
+            }
+            commentator().stop("[MultiModLifting] Lifting");
+
+            // CRT reconstruction from paddicAccumulations
+            commentator().start("[MultiModLifting] CRT Reconstruction Progress");
+            using CRAField = Givaro::Modular<Integer>;
+            RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2)
+
+            {
+                CRAField field(radices[0]);
+                craBuilder.initialize(field, padicAccumulations[0]);
+            }
+
+            for (auto j = 1u; j < _lc.primesCount(); ++j) {
+                CRAField field(radices[j]);
+                craBuilder.progress(field, padicAccumulations[j]);
+            }
+            commentator().stop("[MultiModLifting] CRT Reconstruction Progress");
+
+            // Rational reconstruction
+            craBuilder.result(xNum, xDen, _lc.numBound());
+
+            return true;
+        }
+
+    private:
+        LiftingContainer& _lc;
+    };
+}
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
index 5d6cd798f..89dfb5cbd 100644
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ b/linbox/solutions/solve/solve-dixon-rns.h
@@ -23,101 +23,9 @@
 #pragma once
 
 #include <linbox/algorithms/multi-mod-lifting-container.h>
+#include <linbox/algorithms/multi-mod-rational-reconstruction.h>
 
 namespace LinBox {
-    /**
-     * From a MultiModLiftingContainer, will build
-     * the solution on each prime, then will do a CRT reconstruction,
-     * before reconstructing the rational.
-     *
-     * This does not do early termination.
-     */
-    template <class LiftingContainer>
-    class MultiModRationalReconstruction {
-        using Ring = typename LiftingContainer::Ring;
-        using IElement = typename LiftingContainer::IElement;
-        using IVector = typename LiftingContainer::IVector;
-        using FElement = typename LiftingContainer::FElement;
-        using FVector = typename LiftingContainer::FVector;
-
-    public:
-        MultiModRationalReconstruction(LiftingContainer& lc)
-            : _lc(lc)
-        {
-        }
-
-        bool getRational(IVector& xNum, IElement& xDen)
-        {
-            // Early out when the numerator is bounded by zero.
-            if (_lc.numBound() == 0) {
-                for (auto i = 0u; i < _lc.length(); ++i) {
-                    _lc.ring().assign(xNum[i], _lc.ring().zero);
-                }
-                _lc.ring().assign(xDen, _lc.ring().one);
-                return true;
-            }
-
-            commentator().start("[MultiModLifting] Lifting");
-
-            // Temporary structure to store a ci for each pj
-            std::vector<FVector> digits;
-            digits.reserve(_lc.primesCount());
-            for (auto& F : _lc.primesFields()) {
-                digits.emplace_back(F, _lc.size());
-            }
-
-            // The pj^i for each pj
-            std::vector<IElement> radices(_lc.primesCount(), 1);
-
-            // Stores each c0 + c1 pj + ... + ck pj^k for each pj
-            std::vector<IVector> padicAccumulations(_lc.primesCount(), _lc.ring());
-            for (auto j = 0u; j < _lc.primesCount(); ++j) {
-                padicAccumulations[j].resize(_lc.size());
-            }
-
-            // @fixme Better use PolEval (or will it cause memory explosion?)
-            VectorDomain<Ring> IVD(_lc.ring());
-            for (auto i = 0u; i < _lc.length(); ++i) {
-                _lc.next(digits);
-
-#pragma omp parallel for
-                for (auto j = 0u; j < _lc.primesCount(); ++j) {
-                    // @fixme @cpernet digits being a field vector, this will implicitly cast
-                    // each of its elements to a Integer, is there something better?
-                    // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter
-                    IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci
-                    _lc.ring().mulin(radices[j], _lc.prime(j));
-                }
-            }
-            commentator().stop("[MultiModLifting] Lifting");
-
-            // CRT reconstruction from paddicAccumulations
-            commentator().start("[MultiModLifting] CRT Reconstruction Progress");
-            using CRAField = Givaro::Modular<Integer>;
-            RationalCRABuilderFullMultip<CRAField> craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2)
-
-            {
-                CRAField field(radices[0]);
-                craBuilder.initialize(field, padicAccumulations[0]);
-            }
-
-            for (auto j = 1u; j < _lc.primesCount(); ++j) {
-                CRAField field(radices[j]);
-                craBuilder.progress(field, padicAccumulations[j]);
-            }
-            commentator().stop("[MultiModLifting] CRT Reconstruction Progress");
-
-            // Rational reconstruction
-            craBuilder.result(xNum, xDen, _lc.numBound());
-
-            return true;
-        }
-
-    private:
-        LiftingContainer& _lc;
-    };
-
-    // @fixme Move that to a file - and make it be a RationalSolver<Method::DixonRNS>
     template <class Field, class Ring, class PrimeGenerator>
     class DixonRNSSolver {
     public:
@@ -172,9 +80,13 @@ namespace LinBox {
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
         PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
 
+        // @fixme TO BE REMOVED
         DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
         solver.solve(xNum, xDen, A, b, m);
 
+        DixonSolver<Ring, Field, PrimeGenerator, Method::DenseElimination> classicSolver(A.field(), primeGenerator);
+        classicSolver.solveNonsingular(xNum, xDen, A, b, false, m.trialsBeforeFailure);
+
         commentator().stop("solve.dixon-rns.integer.dense");
 
         // @fixme Implement something like that
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index d9f825b26..f12668faa 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -329,7 +329,7 @@ int main(int argc, char** argv)
         // seed, verbose);
 
         // ----- Rational DixonRNS
-        ok = ok && test_dense_solve(Method::DixonRNS(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
         // // ----- Modular Auto
         // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);

From 0a68ceb0a14a0232307eda70a742f9f98de78997 Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Fri, 16 Aug 2019 13:53:06 +0200
Subject: [PATCH 52/63] 'Fixed' threading bug

---
 .../algorithms/multi-mod-lifting-container.h  | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index a7b1b12fc..101b2881f 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -99,9 +99,9 @@ namespace LinBox {
         {
             linbox_check(A.rowdim() == A.coldim());
 
-            std::cout << "----------" << std::endl;
-            A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
-            std::cout << "b: " << b << std::endl;
+            // std::cout << "----------" << std::endl;
+            // A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl;
+            // std::cout << "b: " << b << std::endl;
 
             // This will contain the primes or our MultiMod basis
             _primesCount = m.primesCount;
@@ -124,7 +124,7 @@ namespace LinBox {
                 double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n));
                 _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1));
                 _rnsPrimes.resize(_rnsPrimesCount);
-                std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
+                // std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl;
 
                 auto trialsLeft = m.trialsBeforeFailure;
                 std::vector<double> primes;
@@ -147,16 +147,23 @@ namespace LinBox {
                     }
 
                     // Inserting the primes at the right place to keep the array sorted
-                    std::cout << "Adding " << Integer(p) << std::endl;
                     primes.insert(lb, p);
                 }
 
                 // We take the smallest primes for our MultiMod basis
                 std::copy(primes.begin(), primes.begin() + _primesCount, _primes.begin());
 
+                // for (auto i = 0u; i < _primes.size(); ++i) {
+                //     std::cout << "p" << i << " = " << Integer(_primes[i]) << std::endl;
+                // }
+
                 // And the others for our RNS basis
                 std::copy(primes.begin() + _primesCount, primes.end(), _rnsPrimes.begin());
 
+                // for (auto i = 0u; i < _rnsPrimes.size(); ++i) {
+                //     std::cout << "q" << i << " = " << Integer(_rnsPrimes[i]) << std::endl;
+                // }
+
                 // We check that we really need all the primes within the RNS basis,
                 // as the first count was just an upper estimation.
                 double bitSize = 0.0;
@@ -174,7 +181,6 @@ namespace LinBox {
             // Setting fields up
             for (auto& pj : _primes) {
                 _fields.emplace_back(pj);
-                std::cout << Integer(pj) << std::endl;
             }
 
             // Initialize all inverses
@@ -257,7 +263,7 @@ namespace LinBox {
 
                 // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...)
                 _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct);
-                std::cout << "_iterationsCount " << _iterationsCount << std::endl;
+                // std::cout << "_iterationsCount " << _iterationsCount << std::endl;
             }
 
             //----- Locals setup
@@ -325,10 +331,13 @@ namespace LinBox {
             VectorDomain<Ring> IVD(_ring);
             BlasMatrixDomain<Ring> IMD(_ring);
 
-// commentator().start("[MultiModLifting] c = A^{-1} r mod p");
+            // commentator().start("[MultiModLifting] c = A^{-1} r mod p");
             PAR_BLOCK
             {
-                auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
+                // @fixme @zhuh Can't get that working with NUM_THREADS,
+                // any idea what makes it wrong?
+                // ./test-solve-full -n 1 -m 1 -b 50 -v -l
+                auto sp = SPLITTER(1 /* NUM_THREADS */, FFLAS::CuttingStrategy::Row,
                                    FFLAS::StrategyParameter::Threads);
                 int M = _primesCount;
                 FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), {
@@ -403,7 +412,7 @@ namespace LinBox {
                     auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
                                        FFLAS::StrategyParameter::Threads);
                     int M = _primesCount;
-                    FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), {
+                    FOR1D(j, M, sp, {
                         auto& rnsPrimeInverse = _rnsPrimesInverses[j];
                         auto stridePrimeInverse = rnsPrimeInverse._stride;
                         auto rnsPrimeInverseForRnsPrimeH =

From 2f70e969fabaf1187488b01f151d7d3f20521e26 Mon Sep 17 00:00:00 2001
From: "A. Breust" <alexis.breust@gmail.com>
Date: Fri, 16 Aug 2019 15:15:06 +0200
Subject: [PATCH 53/63] Merged DixonRNS within Dixon

---
 benchmarks/benchmark-dense-solve.C            | 14 ++-
 .../dixon-solver/dixon-solver-dense.h         |  4 +-
 .../dixon-solver/dixon-solver-dense.inl       | 88 +++++++++--------
 .../algorithms/multi-mod-lifting-container.h  |  6 +-
 linbox/solutions/methods.h                    | 14 ++-
 linbox/solutions/solve.h                      |  6 --
 linbox/solutions/solve/solve-dixon-rns.h      | 99 -------------------
 linbox/solutions/solve/solve-dixon.h          |  2 +-
 tests/test-solve-full.C                       |  9 +-
 9 files changed, 75 insertions(+), 167 deletions(-)
 delete mode 100644 linbox/solutions/solve/solve-dixon-rns.h

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index 63faf090b..19fb5505d 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -54,7 +54,7 @@ namespace {
         int n = 500;
         int bits = 10;
         int seed = -1;
-        int primesCount = 8;
+        int primesCount = -1;
         std::string dispatchString = "Auto";
         std::string methodString = "Auto";
         std::string rnsFgemmString = "ParallelRnsOnly";
@@ -117,7 +117,6 @@ void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& met
     else if (args.methodString == "DenseElimination")       solve(X, A, B, Method::DenseElimination(method));
     else if (args.methodString == "SparseElimination")      solve(X, A, B, Method::SparseElimination(method));
     else if (args.methodString == "Dixon")                  solve(X, A, B, Method::Dixon(method));
-    else if (args.methodString == "DixonRNS")               solve(X, A, B, Method::DixonRNS(method));
     else if (args.methodString == "CRA")                    solve(X, A, B, Method::CRAAuto(method));
     else if (args.methodString == "SymbolicNumericOverlap") solve(X, A, B, Method::SymbolicNumericOverlap(method));
     else if (args.methodString == "SymbolicNumericNorm")    solve(X, A, B, Method::SymbolicNumericNorm(method));
@@ -149,11 +148,11 @@ int main(int argc, char** argv)
                      {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed},
                      {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString},
                      {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString},
-                     {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &args.primesCount},
+                     {'p', "-p", "Enable multi-modular method, and tells how many primes to use.", TYPE_INT, &args.primesCount},
 		             {'t', "-t", "Number of threads.", TYPE_INT, &numThreads },
                      {'M', "-M",
                       "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, "
-                      "Dixon, DixonRNS, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, "
+                      "Dixon, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, "
                       "Blackbox, Wiedemann, Lanczos).",
                       TYPE_STR, &args.methodString},
                      END_OF_ARGUMENTS};
@@ -176,7 +175,12 @@ int main(int argc, char** argv)
 
     MethodBase method;
     method.pCommunicator = &communicator;
-    method.primesCount = args.primesCount;
+    if (args.primesCount > 0) {
+        method.multiModularLifting = true;
+        method.primesCount = args.primesCount;
+    } else {
+        method.multiModularLifting = false;
+    }
     if (args.dispatchString == "Sequential")        method.dispatch = Dispatch::Sequential;
     else if (args.dispatchString == "SMP")          method.dispatch = Dispatch::SMP;
     else if (args.dispatchString == "Distributed")  method.dispatch = Dispatch::Distributed;
diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.h b/linbox/algorithms/dixon-solver/dixon-solver-dense.h
index 0d378e69f..6f0357403 100644
--- a/linbox/algorithms/dixon-solver/dixon-solver-dense.h
+++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.h
@@ -89,6 +89,7 @@ namespace LinBox {
         mutable Prime _prime;
         Ring _ring;
         Field _field;
+        Method::Dixon _method;
 
         BlasMatrixDomain<Field> _bmdf;
 
@@ -113,10 +114,11 @@ namespace LinBox {
          * @param r   a Ring, set by default
          * @param rp  a RandomPrime generator, set by default
          */
-        DixonSolver(const Ring& r = Ring(), const RandomPrime& rp = RandomPrime())
+        DixonSolver(const Ring& r = Ring(), const RandomPrime& rp = RandomPrime(), const Method::Dixon& method = Method::Dixon())
             : lastCertificate(r, 0)
             , _genprime(rp)
             , _ring(r)
+            , _method(method)
         {
             _genprime.setBits(FieldTraits<Field>::bestBitSize());
             _prime = *_genprime;
diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
index c6a2a81b3..4935548f2 100644
--- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
+++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl
@@ -130,26 +130,23 @@ namespace LinBox {
             }
         } while (notfr);
 
-        // commentator().start("CLASSIC DIXON LIFTING");
-        // typedef DixonLiftingContainer<Ring, Field, IMatrix, BlasMatrix<Field>> LiftingContainer;
-        // LiftingContainer lc(_ring, *F, A, *FMP, b, _prime);
-        // RationalReconstruction<LiftingContainer> re(lc);
-        // if (!re.getRational(num, den, 0)) {
-        //     delete FMP;
-        //     return SS_FAILED;
-        // }
-        // commentator().stop("CLASSIC DIXON LIFTING");
-
-        commentator().start("MULTI MOD DIXON LIFTING");
-        using LiftingContainer = MultiModLiftingContainer<Field, Ring, RandomPrime>;
-        Method::DixonRNS m; // @fixme Get from?
-        LiftingContainer lc(_ring, _genprime, A, b, m);
-        MultiModRationalReconstruction<LiftingContainer> re(lc);
-        if (!re.getRational(num, den)) {
-            delete FMP;
-            return SS_FAILED;
+        if (_method.multiModularLifting) {
+            using LiftingContainer = MultiModLiftingContainer<Field, Ring, RandomPrime>;
+            LiftingContainer lc(_ring, _genprime, A, b, _method);
+            MultiModRationalReconstruction<LiftingContainer> re(lc);
+            if (!re.getRational(num, den)) {
+                delete FMP;
+                return SS_FAILED;
+            }
+        } else {
+            using LiftingContainer = DixonLiftingContainer<Ring, Field, IMatrix, BlasMatrix<Field>>;
+            LiftingContainer lc(_ring, *F, A, *FMP, b, _prime);
+            RationalReconstruction<LiftingContainer> re(lc);
+            if (!re.getRational(num, den, 0)) {
+                delete FMP;
+                return SS_FAILED;
+            }
         }
-        commentator().stop("MULTI MOD DIXON LIFTING");
 
 #ifdef RSTIMING
         ttNonsingularSolve.update(re, lc);
@@ -287,8 +284,6 @@ namespace LinBox {
     SolverReturnStatus DixonSolver<Ring, Field, RandomPrime, Method::DenseElimination>::solveApparentlyInconsistent(
         const BlasMatrix<Ring>& A, TAS& tas, BlasMatrix<Field>* Atp_minor_inv, size_t rank, const MethodBase& method)
     {
-        using LiftingContainer = DixonLiftingContainer<Ring, Field, BlasMatrix<Ring>, BlasMatrix<Field>>;
-
         if (!method.certifyInconsistency) return SS_INCONSISTENT;
 
         // @fixme Put these as class members!
@@ -311,15 +306,24 @@ namespace LinBox {
         ttCheckConsistency += tCheckConsistency;
 #endif
 
-        LiftingContainer lc(_ring, _field, At_minor, *Atp_minor_inv, zt, _prime);
-        RationalReconstruction<LiftingContainer> re(lc);
-
         BlasVector<Ring> shortNum(A.field(), rank);
         Integer shortDen;
 
-        // Dirty, but should not be called under normal circumstances
-        if (!re.getRational(shortNum, shortDen, 0)) {
-            return SS_FAILED;
+        if (_method.multiModularLifting) {
+            using LiftingContainer = MultiModLiftingContainer<Field, Ring, RandomPrime>;
+            LiftingContainer lc(_ring, _genprime, At_minor, zt, _method);
+            MultiModRationalReconstruction<LiftingContainer> re(lc);
+            if (!re.getRational(shortNum, shortDen)) {
+                return SS_FAILED;
+            }
+        }
+        else {
+            using LiftingContainer = DixonLiftingContainer<Ring, Field, BlasMatrix<Ring>, BlasMatrix<Field>>;
+            LiftingContainer lc(_ring, _field, At_minor, *Atp_minor_inv, zt, _prime);
+            RationalReconstruction<LiftingContainer> re(lc);
+            if (!re.getRational(shortNum, shortDen, 0)) {
+                return SS_FAILED;
+            }
         }
 
 #ifdef RSTIMING
@@ -597,8 +601,6 @@ namespace LinBox {
     SolverReturnStatus DixonSolver<Ring, Field, RandomPrime, Method::DenseElimination>::monolithicSolve(
         Vector1& num, Integer& den, const IMatrix& A, const Vector2& b, Method::Dixon method)
     {
-        using LiftingContainer = DixonLiftingContainer<Ring, Field, BlasMatrix<Ring>, BlasMatrix<Field>>;
-
         if (method.certifyMinimalDenominator && !method.certifyInconsistency) {
             method.certifyInconsistency = true;
             std::cerr << "WARNING: forcing certifyInconsistency due to certifyMinimalDenominator" << std::endl;
@@ -716,21 +718,27 @@ namespace LinBox {
             BMDI.mulin_right(tas.Q, newb);
             newb.resize(rank);
 
-            // ----- Do lifting on sub matrix
+            // ----- Do lifting on sub matrix and reconstruct
 
             BlasMatrix<Ring> BBA_minor(A_minor);
-            commentator().start("CLASSIC DIXON LIFTING");
-            LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime);
-
-            // ----- Reconstruct rational
-
-            RationalReconstruction<LiftingContainer> re(lc);
             VectorFraction<Ring> resultVF(_ring, rank);
-            if (!re.getRational(resultVF.numer, resultVF.denom, 0)) {
-                // dirty, but should not be called
-                return SS_FAILED;
+
+            if (_method.multiModularLifting) {
+                using LiftingContainer = MultiModLiftingContainer<Field, Ring, RandomPrime>;
+                LiftingContainer lc(_ring, _genprime, BBA_minor, newb, _method);
+                MultiModRationalReconstruction<LiftingContainer> re(lc);
+                if (!re.getRational(resultVF.numer, resultVF.denom)) {
+                    return SS_FAILED;
+                }
+            }
+            else {
+                using LiftingContainer = DixonLiftingContainer<Ring, Field, BlasMatrix<Ring>, BlasMatrix<Field>>;
+                LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime);
+                RationalReconstruction<LiftingContainer> re(lc);
+                if (!re.getRational(resultVF.numer, resultVF.denom, 0)) {
+                    return SS_FAILED;
+                }
             }
-            commentator().stop("CLASSIC DIXON LIFTING");
 
 #ifdef RSTIMING
             ttSystemSolve.update(re, lc);
diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 101b2881f..71e0a0008 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -57,7 +57,7 @@ namespace LinBox {
      *      (iii)   y = CRT_Reconstruct(y1, ..., yl)
      *      (iv)    x = Rational_Reconstruct(y)
      *
-     * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`.
+     * One can configure how many primes are used with `Method::Dixon.primesCount`.
      * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2
      * can be used, but it depends on the problem, really.
      */
@@ -88,7 +88,7 @@ namespace LinBox {
 
         // @fixme Split to inline file
         MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A,
-                                 const IVector& b, const Method::DixonRNS& m)
+                                 const IVector& b, const Method::Dixon& m)
             : _ring(ring)
             , _method(m)
             , _A(A)
@@ -473,7 +473,7 @@ namespace LinBox {
 
     public: // @fixme BACK TO PRIVATE!
         const Ring& _ring;
-        Method::DixonRNS _method; // A copy of the user-provided method.
+        Method::Dixon _method; // A copy of the user-provided method.
 
         // The problem: A^{-1} * b
         const IMatrix& _A;
diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h
index 63b7af584..063a44dbf 100644
--- a/linbox/solutions/methods.h
+++ b/linbox/solutions/methods.h
@@ -232,9 +232,12 @@ namespace LinBox {
         SingularSolutionType singularSolutionType = SingularSolutionType::Random;
         bool certifyMinimalDenominator = false; //!< Whether the solver should try to find a certificate
                                                 //!  that the provided denominator is minimal.
-
-        // ----- For DixonRNS method.
-        //! How many primes to use lifting will be done over p = p1p2...pl.
+        // @fixme Make a auto switch for multi modular lifting, based on matrix size.
+        // Whether to use the multi-modular Dixon lifter.
+        // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005)
+        // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
+        bool multiModularLifting = true;
+        //! How many primes to use, multi mod lifting will be done over p = p1p2...pl.
         //! -1 means automatically set to a heuristic value.
         uint32_t primesCount = -1u;
         RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly;
@@ -284,11 +287,6 @@ namespace LinBox {
         // (Numerische Mathematik - Dixon 1982)
         DEFINE_METHOD(Dixon, RingCategories::IntegerTag);
 
-        // Method::DixonRNS uses RNS features over Dixon's p-adic lifting.
-        // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005)
-        // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf
-        DEFINE_METHOD(DixonRNS, RingCategories::IntegerTag);
-
         // Method::ChineseRemainder uses the chinese remainder algorithm
         // to solve the problem on multiple modular domains,
         // and finally reconstruct the solution.
diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h
index 2a0e8f1dc..e827167c6 100644
--- a/linbox/solutions/solve.h
+++ b/linbox/solutions/solve.h
@@ -84,11 +84,6 @@ namespace LinBox {
      *      |   - SparseMatrix  > `DixonSolver<..., Method::SparseElimination>`
      *      |   - Otherwise     >  Error
      *      - Otherwise > Error
-     * - Method::DixonRNS
-     *      - IntegerTag
-     *      |   - DenseMatrix   > `DixonRNSSolver`
-     *      |   - Otherwise     >  Error
-     *      - Otherwise > Error
      * - Method::Blackbox > Method::Wiedemann
      * - Method::Wiedemann
      *      - ModularTag > `WiedemannSolver`
@@ -342,7 +337,6 @@ namespace LinBox {
 // Integer-based
 #include "./solve/solve-cra.h"
 #include "./solve/solve-dixon.h"
-#include "./solve/solve-dixon-rns.h"
 #include "./solve/solve-numeric-symbolic.h"
 
 // Blackbox
diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h
deleted file mode 100644
index 89dfb5cbd..000000000
--- a/linbox/solutions/solve/solve-dixon-rns.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright(C) LinBox
- *
- * ========LICENCE========
- * This file is part of the library LinBox.
- *
- * LinBox is free software: you can redistribute it and/or modify
- * it under the terms of the  GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * ========LICENCE========
- */
-
-#pragma once
-
-#include <linbox/algorithms/multi-mod-lifting-container.h>
-#include <linbox/algorithms/multi-mod-rational-reconstruction.h>
-
-namespace LinBox {
-    template <class Field, class Ring, class PrimeGenerator>
-    class DixonRNSSolver {
-    public:
-        DixonRNSSolver(const Ring& ring, PrimeGenerator& primeGenerator)
-            : _ring(ring)
-            , _primeGenerator(primeGenerator)
-        {
-            /* @todo */
-        }
-
-        /**
-         * Dense solving.
-         */
-        template <class RVector, class Vector>
-        void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
-                   const Method::DixonRNS& m)
-        {
-            // @fixme We should use some code from DixonSolver...
-            // But that's hard so we just assume that A is square and invertible.
-            linbox_check(A.rowdim() == A.coldim());
-
-            using LiftingContainer = MultiModLiftingContainer<Field, Ring, PrimeGenerator>;
-
-            commentator().start("[MultiModLifting] Init");
-            LiftingContainer lc(_ring, _primeGenerator, A, b, m);
-            MultiModRationalReconstruction<LiftingContainer> re(lc);
-            commentator().stop("[MultiModLifting] Init");
-
-            if (!re.getRational(xNum, xDen)) {
-                std::cerr << "OUCH!" << std::endl;
-            }
-        }
-
-    private:
-        const Ring& _ring;
-        PrimeGenerator& _primeGenerator;
-    };
-
-    /**
-     * \brief Solve specialisation for DixonRNS on dense matrices.
-     */
-    template <class RVector, class Ring, class Vector>
-    void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix<Ring>& A, const Vector& b,
-               const RingCategories::IntegerTag& tag, const Method::DixonRNS& m)
-    {
-        commentator().start("solve.dixon-rns.integer.dense");
-
-        // @fixme We don't know if we can use ModularBalanced<double>,
-        // because of the rational reconstruction which might be
-        // implicitly requiring 0-{p-1} representation of the p-adic sequence elements.
-        using Field = Givaro::Modular<double>;
-        using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;
-        PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
-
-        // @fixme TO BE REMOVED
-        DixonRNSSolver<Field, Ring, PrimeGenerator> solver(A.field(), primeGenerator);
-        solver.solve(xNum, xDen, A, b, m);
-
-        DixonSolver<Ring, Field, PrimeGenerator, Method::DenseElimination> classicSolver(A.field(), primeGenerator);
-        classicSolver.solveNonsingular(xNum, xDen, A, b, false, m.trialsBeforeFailure);
-
-        commentator().stop("solve.dixon-rns.integer.dense");
-
-        // @fixme Implement something like that
-        // if (status == SS_INCONSISTENT) {
-        //     throw LinboxMathInconsistentSystem("From DixonRNS method.");
-        // } else if (status == SS_FAILED || status == SS_BAD_PRECONDITIONER) {
-        //     throw LinboxError("From DixonRNS method.");
-        // }
-    }
-}
\ No newline at end of file
diff --git a/linbox/solutions/solve/solve-dixon.h b/linbox/solutions/solve/solve-dixon.h
index 3afddc0ca..4a988673e 100644
--- a/linbox/solutions/solve/solve-dixon.h
+++ b/linbox/solutions/solve/solve-dixon.h
@@ -96,7 +96,7 @@ namespace LinBox {
         PrimeGenerator primeGenerator(FieldTraits<Field>::bestBitSize(A.coldim()));
 
         using Solver = DixonSolver<Ring, Field, PrimeGenerator, typename MethodForMatrix<Matrix>::type>;
-        Solver dixonSolve(A.field(), primeGenerator);
+        Solver dixonSolve(A.field(), primeGenerator, m);
 
         // Either A is known to be non-singular, or we just don't know yet.
         int maxTrials = m.trialsBeforeFailure;
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index f12668faa..e278bdb80 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -272,8 +272,12 @@ int main(int argc, char** argv)
     }
 
     if (primesCount > 0) {
+        method.multiModularLifting = true;
         method.primesCount = primesCount;
     }
+    else {
+        method.multiModularLifting = false;
+    }
 
     if (vectorBitSize < 0) {
         vectorBitSize = bitSize;
@@ -314,7 +318,7 @@ int main(int argc, char** argv)
         // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
         // // ----- Rational Dixon
-        // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // // @fixme Dixon<Wiedemann> does not compile
         // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
@@ -328,9 +332,6 @@ int main(int argc, char** argv)
         // ok = ok && test_sparse_solve(Method::SymbolicNumericNorm(method), ZZ, QQ, m, n, bitSize, vectorBitSize,
         // seed, verbose);
 
-        // ----- Rational DixonRNS
-        ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
         // // ----- Modular Auto
         // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose);

From 0dc46e1dc82a7458f18634e33f77b4a5992522cf Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 19 Aug 2019 09:50:10 +0200
Subject: [PATCH 54/63] Fixed library compilation

---
 linbox/algorithms/multi-mod-rational-reconstruction.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h
index 6fae6a357..240042b97 100644
--- a/linbox/algorithms/multi-mod-rational-reconstruction.h
+++ b/linbox/algorithms/multi-mod-rational-reconstruction.h
@@ -22,11 +22,13 @@
 
 #pragma once
 
+#include "./rational-cra-builder-full-multip.h"
+
 namespace LinBox {
     /**
      * From a MultiModLiftingContainer, will build
      * the solution on each prime, then will do a CRT reconstruction,
-     * before reconstructing the rational.
+     * before reconstructing the rational.95
      *
      * This does not do early termination.
      */

From cc7a0a50df55f5a172112ae7550f45986186da08 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 20 Aug 2019 10:43:16 +0200
Subject: [PATCH 55/63] Quick adjustement for THE BUG

---
 linbox/algorithms/multi-mod-lifting-container.h | 4 +---
 linbox/solutions/solve/solve-dixon.h            | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 71e0a0008..d41eb0e2a 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -109,8 +109,6 @@ namespace LinBox {
                 PAR_BLOCK { _primesCount = 6 * NUM_THREADS; }
             }
 
-            // @fixme !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-            _primesCount = 2;
             _primes.resize(_primesCount);
 
             // Some preparation work
@@ -337,7 +335,7 @@ namespace LinBox {
                 // @fixme @zhuh Can't get that working with NUM_THREADS,
                 // any idea what makes it wrong?
                 // ./test-solve-full -n 1 -m 1 -b 50 -v -l
-                auto sp = SPLITTER(1 /* NUM_THREADS */, FFLAS::CuttingStrategy::Row,
+                auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
                                    FFLAS::StrategyParameter::Threads);
                 int M = _primesCount;
                 FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), {
diff --git a/linbox/solutions/solve/solve-dixon.h b/linbox/solutions/solve/solve-dixon.h
index 4a988673e..9f29fd3b5 100644
--- a/linbox/solutions/solve/solve-dixon.h
+++ b/linbox/solutions/solve/solve-dixon.h
@@ -89,7 +89,7 @@ namespace LinBox {
         commentator().start("solve.dixon.integer.dense");
         linbox_check((A.coldim() == xNum.size()) && (A.rowdim() == b.size()));
 
-        // @fixme Using Givaro::ModularBalanced<double> for the field makes Dixon fail...
+        // @note Using Givaro::ModularBalanced<double> would make Dixon and MultiModLiftingContainer fail...
         using Matrix = DenseMatrix<Ring>;
         using Field = Givaro::Modular<double>;
         using PrimeGenerator = PrimeIterator<IteratorCategories::HeuristicTag>;

From cd928254f935112106be2a9558b3e06de2594bb7 Mon Sep 17 00:00:00 2001
From: Jean-Guillaume Dumas <Jean-Guillaume.Dumas@imag.fr>
Date: Wed, 28 Aug 2019 10:35:55 +0200
Subject: [PATCH 56/63] Now using DenseVector

---
 linbox/algorithms/last-invariant-factor.h | 42 +++++++++--------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/linbox/algorithms/last-invariant-factor.h b/linbox/algorithms/last-invariant-factor.h
index 80aa67880..b3cbd9b3c 100644
--- a/linbox/algorithms/last-invariant-factor.h
+++ b/linbox/algorithms/last-invariant-factor.h
@@ -51,7 +51,7 @@ namespace LinBox
 
 	protected:
 
-		typedef BlasVector<Ring>         DVect;
+		typedef DenseVector<Ring>         DVect;
 		Ring        r;
         mutable typename Ring::RandIter _gen;
 		Solver solver;
@@ -110,20 +110,16 @@ namespace LinBox
 			Integer r_den;
 			//std::vector<std::pair<Integer, Integer> > result (A.coldim());
 			//typename std::vector<std::pair<Integer, Integer> >::iterator result_p;
-			// vector b, RHS, 32-bit int is good enough
-			std::vector<int> b(A.rowdim());
-			typename std::vector<int>::iterator b_p;
-			typename Vector::const_iterator Prime_p;
+			DenseVector<Ring> b(r, A.rowdim());
 
 			Integer pri, quo, rem, itmp;
 
 			for (; count < threshold; ++ count) {
 				// assign b to be a random vector
-				for (b_p = b.begin(); b_p != b.end(); ++ b_p) {
-//					* b_p = rand() % 268435456 - 134217728; // may need to change to use ring's random gen.
-//					// dpritcha, 2004-07-26
-                    _gen( itmp );
-                    * b_p = (int)itmp;
+				for (auto b_p = b.begin(); b_p != b.end(); ++ b_p) {
+                   _gen( itmp );
+                        //@enhancement vector b, RHS, 32-bit is good enough
+                   * b_p = Integer((int32_t)itmp);
 				}
 
 				// try to solve Ax = b over Ring
@@ -138,7 +134,7 @@ namespace LinBox
 			}
 			// filter out primes in PRIMEL from lif.
 			if (!r. isZero (lif))
-				for ( Prime_p = PrimeL.begin();
+				for ( auto Prime_p = PrimeL.begin();
 				      Prime_p != PrimeL.end();
 				      ++ Prime_p) {
 					r.init (pri, *Prime_p);
@@ -172,21 +168,17 @@ namespace LinBox
 			Integer r1_den, r2_den;
 			//std::vector<std::pair<Integer, Integer> > result (A.coldim());
 			//typename std::vector<std::pair<Integer, Integer> >::iterator result_p;
-			// vector b, RHS, 32-bit int is good enough
-			std::vector<int> b1(A. rowdim()), b2(A. rowdim());
-			typename std::vector<int>::iterator b_p;
-			typename Vector::const_iterator Prime_p;
+			//@enhancement vector b, RHS, 32-bit instead would be good enough
+            DenseVector<Ring> b1(r, A. rowdim()), b2(r, A. rowdim());
 			Integer pri, quo, rem;
 
 			for (; count < (threshold + 1) / 2; ++ count) {
 				// assign b to be a random vector
-				for (b_p = b1. begin(); b_p != b1. end(); ++ b_p) {
-//					* b_p = rand();
-                                        *b_p = _gen.random();//(* b_p);
+				for (auto b_p = b1. begin(); b_p != b1. end(); ++ b_p) {
+                    _gen.random(*b_p);
 				}
-				for (b_p = b2. begin(); b_p != b2. end(); ++ b_p) {
-//					* b_p = rand();
-                                        *b_p = _gen.random();//(* b_p);
+				for (auto b_p = b2. begin(); b_p != b2. end(); ++ b_p) {
+                    _gen.random(*b_p);
 				}
 				// try to solve Ax = b1, b2 over Ring
 				tmp1 = solver. solveNonsingular(r1_num, r1_den, A, b1);
@@ -243,7 +235,7 @@ namespace LinBox
 
 			// filter out primes in PRIMEL from lif.
 			if (!r. isZero (lif))
-				for ( Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) {
+				for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) {
 					r.init (pri, *Prime_p);
 					do {
 						r.quoRem(quo,rem,lif,pri);
@@ -253,7 +245,7 @@ namespace LinBox
 				}
 			r. gcdin (Bonus, lif);
 			if (!r. isZero (Bonus))
-				for ( Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) {
+				for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) {
 					r.init (pri, *Prime_p);
 					do {
 						r.quoRem(quo,rem,Bonus,pri);
@@ -275,13 +267,11 @@ namespace LinBox
 			if (r_num.size()!=A. coldim()) return lif=0;
 			Integer r_den;
 			DVect b(r,A.rowdim());
-			typename DVect::iterator b_p;
-			//typename Vector::const_iterator Prime_p;
 
 			Integer pri, quo, rem;
 
 			// assign b to be a random vector
-			for (b_p = b.begin(); b_p != b.end(); ++ b_p) {
+			for (auto b_p = b.begin(); b_p != b.end(); ++ b_p) {
 //				* b_p = rand() % 268435456 - 134217728; // may need to change to use ring's random gen.
 //				// dpritcha, 2004-07-26
                 _gen( * b_p );

From 7a56154ee2dfe374d0ae153425089e6b06d460a0 Mon Sep 17 00:00:00 2001
From: Jean-Guillaume Dumas <Jean-Guillaume.Dumas@imag.fr>
Date: Wed, 28 Aug 2019 11:29:51 +0200
Subject: [PATCH 57/63] sequential parseq must have only one thread

---
 linbox/algorithms/multi-mod-lifting-container.h | 17 ++++++++++-------
 .../multi-mod-rational-reconstruction.h         |  1 +
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index d41eb0e2a..c97391e84 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -314,7 +314,7 @@ namespace LinBox {
         Integer denBound() const { return _denBound; }
 
         uint32_t primesCount() const { return _primesCount; }
-        const FElement& prime(uint32_t index) const { return _primes.at(index); }
+        FElement prime(uint32_t index) const { return _primes.at(index); }
         const std::vector<Field>& primesFields() const { return _fields; }
 
         // --------------
@@ -328,10 +328,13 @@ namespace LinBox {
         {
             VectorDomain<Ring> IVD(_ring);
             BlasMatrixDomain<Ring> IMD(_ring);
+            size_t numthreads;
 
             // commentator().start("[MultiModLifting] c = A^{-1} r mod p");
             PAR_BLOCK
             {
+                numthreads = NUM_THREADS;
+
                 // @fixme @zhuh Can't get that working with NUM_THREADS,
                 // any idea what makes it wrong?
                 // ./test-solve-full -n 1 -m 1 -b 50 -v -l
@@ -383,16 +386,16 @@ namespace LinBox {
             // commentator().start("[MultiModLifting] FGEMM R <= R - Ac");
             // Firstly compute R <= R - A c as a fgemm within the RNS domain.
             if (_method.rnsFgemmType == RnsFgemmType::BothSequential) {
-                rns_fgemm<FFLAS::ParSeqHelper::Sequential, FFLAS::ParSeqHelper::Sequential>();
+                rns_fgemm<FFLAS::ParSeqHelper::Sequential, FFLAS::ParSeqHelper::Sequential>(1,1);
             }
             else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) {
-                rns_fgemm<RNSParallel, FGEMMParallel>();
+                rns_fgemm<RNSParallel, FGEMMParallel>(numthreads,numthreads);
             }
             else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) {
-                rns_fgemm<FFLAS::ParSeqHelper::Sequential, FGEMMParallel>();
+                rns_fgemm<FFLAS::ParSeqHelper::Sequential, FGEMMParallel>(1,numthreads);
             }
             else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) {
-                rns_fgemm<RNSParallel, FFLAS::ParSeqHelper::Sequential>();
+                rns_fgemm<RNSParallel, FFLAS::ParSeqHelper::Sequential>(numthreads,1);
             }
             // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac");
 
@@ -452,7 +455,7 @@ namespace LinBox {
 
         // @note This allows us to factor out some of the rns fgemm variants common code.
         template <class RnsParSeq, class FgemmParSeq>
-        inline void rns_fgemm()
+        inline void rns_fgemm(size_t threads1, size_t threads2)
         {
             PAR_BLOCK
             {
@@ -460,7 +463,7 @@ namespace LinBox {
                 using MMHelper =
                     FFLAS::MMHelper<RNSDomain, FFLAS::MMHelperAlgo::Classic,
                                     FFLAS::ModeCategories::DefaultTag, ComposedParSeqHelper>;
-                ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS);
+                ComposedParSeqHelper composedParSeqHelper(threads1, threads2);
                 MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper);
 
                 FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n,
diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h
index 240042b97..cd5018c2a 100644
--- a/linbox/algorithms/multi-mod-rational-reconstruction.h
+++ b/linbox/algorithms/multi-mod-rational-reconstruction.h
@@ -89,6 +89,7 @@ namespace LinBox {
                     _lc.ring().mulin(radices[j], _lc.prime(j));
                 }
             }
+
             commentator().stop("[MultiModLifting] Lifting");
 
             // CRT reconstruction from paddicAccumulations

From ab9ac916db54716bce9e09d298e558ce51b155b2 Mon Sep 17 00:00:00 2001
From: Jean-Guillaume Dumas <Jean-Guillaume.Dumas@imag.fr>
Date: Wed, 28 Aug 2019 16:53:47 +0200
Subject: [PATCH 58/63] indent

---
 .../algorithms/multi-mod-lifting-container.h  |  4 +-
 tests/test-last-invariant-factor.C            | 62 +++++++++----------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index c97391e84..55df94d73 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -181,6 +181,7 @@ namespace LinBox {
                 _fields.emplace_back(pj);
             }
 
+
             // Initialize all inverses
             // @note An inverse mod some p within DixonSolver<Dense> was already computed,
             // and pass through to the lifting container. Here, we could use that, but we have
@@ -199,12 +200,11 @@ namespace LinBox {
                     auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
                                        FFLAS::StrategyParameter::Threads);
                     int M = _primesCount;
-                    FOR1D(j, M, sp, MODE(CONSTREFERENCE(nullities)), {
+                    FOR1D(j, M, sp, MODE(WRITE(nullities)), {
                         auto& F = _fields[j];
                         BlasMatrixDomain<Field> bmd(F);
                         bmd.invin(_B[j], nullities[j]);
                     });
-
                     for (auto nullity : nullities) {
                         if (nullity > 0) {
                             // @fixme Should redraw another prime!
diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C
index 8b270bc3e..d3a6e0fca 100644
--- a/tests/test-last-invariant-factor.C
+++ b/tests/test-last-invariant-factor.C
@@ -51,19 +51,19 @@ using namespace LinBox;
 
 template <class Ring, class LIF, class Vector>
 bool testRandom(const Ring& R,
-		LIF& lif,
-		LinBox::VectorStream<Vector>& stream1)
+                LIF& lif,
+                LinBox::VectorStream<Vector>& stream1)
 {
 
 	std::ostringstream str;
 
 	str << "Testing last invariant factor:";
 
-        commentator().start (str.str ().c_str (), "testRandom", stream1.m ());
+    commentator().start (str.str ().c_str (), "testRandom", stream1.m ());
 
-        bool ret = true;
+    bool ret = true;
 
-        VectorDomain<Ring> VD (R);
+    VectorDomain<Ring> VD (R);
 
 	Vector d(R);
 
@@ -73,19 +73,19 @@ bool testRandom(const Ring& R,
 
 	int n = int(d. size());
 
-	 while (stream1) {
+    while (stream1) {
 
-		 commentator().startIteration ((unsigned)stream1.j ());
+        commentator().startIteration ((unsigned)stream1.j ());
 
-		 std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
+        std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
 
-                bool iter_passed = true;
+        bool iter_passed = true;
 
 		stream1.next (d);
 
 		report << "Input vector:  ";
 		VD.write (report, d);
-                report << endl;
+        report << endl;
 
 		BlasMatrix<Ring> D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n);
 
@@ -117,8 +117,8 @@ bool testRandom(const Ring& R,
 			R.assign(e[(size_t)i],R.one);
 			U.apply(tmp1, e);
 			D.apply(tmp2, tmp1);
-			// LinBox::BlasSubvector<BlasVector<Ring> > col_p_v(R,*col_p);
-			// L.apply(col_p_v, tmp2);
+                // LinBox::BlasSubvector<BlasVector<Ring> > col_p_v(R,*col_p);
+                // L.apply(col_p_v, tmp2);
 			L.apply(*col_p, tmp2);
 			R.assign(e[(size_t)i],R.zero);
 		}
@@ -157,24 +157,24 @@ bool testRandom(const Ring& R,
 
 			ret = iter_passed = false;
 
-                if (!iter_passed)
+        if (!iter_passed)
 
-                        commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR)
+            commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR)
 				<< "ERROR: Computed last invariant factor is incorrect" << endl;
 
 
 
-                commentator().stop ("done");
+        commentator().stop ("done");
 
-                commentator().progress ();
+        commentator().progress ();
 
-	 }
+    }
 
-	 //stream1.reset ();
+        //stream1.reset ();
 
-	  commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom");
+    commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom");
 
-	  return ret;
+    return ret;
 
 }
 
@@ -182,30 +182,30 @@ int main(int argc, char** argv)
 {
 
 
-        bool pass = true;
+    bool pass = true;
 
-        static size_t n = 10;
+    static size_t n = 10;
 
 	static unsigned int iterations = 1;
 
-        static Argument args[] = {
-                { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT,     &n },
-                { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT,     &iterations },
+    static Argument args[] = {
+        { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT,     &n },
+        { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT,     &iterations },
 		END_OF_ARGUMENTS
 
-        };
+    };
 
 	parseArguments (argc, argv, args);
 
-        typedef Givaro::ZRing<Integer>      Ring;
+    typedef Givaro::ZRing<Integer>      Ring;
 
-        Ring R; Ring::RandIter gen(R);
+    Ring R; Ring::RandIter gen(R);
 
 	commentator().start("Last invariant factor test suite", "LIF");
 
-        commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5);
+    commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5);
 
-        RandomDenseStream<Ring> s1 (R, gen, n, iterations);
+    RandomDenseStream<Ring> s1 (R, gen, n, iterations);
 
 	typedef DixonSolver<Ring, Givaro::Modular<int32_t>, PrimeIterator<IteratorCategories::HeuristicTag> > Solver;
         // typedef DixonSolver<Ring, Givaro::Modular<double>, LinBox::PrimeIterator<IteratorCategories::HeuristicTag> > Solver;
@@ -219,7 +219,7 @@ int main(int argc, char** argv)
 	if (!testRandom(R, lif, s1)) pass = false;
 
 	commentator().stop("Last invariant factor test suite");
-        return pass ? 0 : -1;
+    return pass ? 0 : -1;
 }
 
 // Local Variables:

From 4c47717bc69fd196cfbc71049b07a56ba3f081d9 Mon Sep 17 00:00:00 2001
From: Jean-Guillaume Dumas <Jean-Guillaume.Dumas@imag.fr>
Date: Wed, 28 Aug 2019 21:28:55 +0200
Subject: [PATCH 59/63] improve test possibilities

---
 linbox/algorithms/last-invariant-factor.h     |  3 ++
 .../rational-cra-builder-full-multip.h        | 18 +++++++-
 tests/test-last-invariant-factor.C            | 41 +++++++++++--------
 3 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/linbox/algorithms/last-invariant-factor.h b/linbox/algorithms/last-invariant-factor.h
index b3cbd9b3c..2c3315cb5 100644
--- a/linbox/algorithms/last-invariant-factor.h
+++ b/linbox/algorithms/last-invariant-factor.h
@@ -124,6 +124,9 @@ namespace LinBox
 
 				// try to solve Ax = b over Ring
 				tmp = solver.solveNonsingular(r_num, r_den, A, b);
+
+                // std::clog << "r_den: " << r_den << std::endl;
+
 				// If no solution found
 				if (tmp != SS_OK) {
 					r.assign (lif, r.zero);
diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h
index f5dc17f92..0e1c68b5a 100644
--- a/linbox/algorithms/rational-cra-builder-full-multip.h
+++ b/linbox/algorithms/rational-cra-builder-full-multip.h
@@ -92,8 +92,24 @@ namespace LinBox
 	protected:
 		Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn)
 		{
+/*            std::clog << "iterativeratrecon"
+                      << ", u1: " << u1
+                      << ", new_den: " << new_den 
+                      << ", old_den: " << old_den
+                      << ", m1: " << m1
+                      << ", sn: " << sn
+                      ;
+*/
 			Integer a;
-			_ZZ.RationalReconstruction(a, new_den, u1*=old_den, m1, sn);
+			bool success = _ZZ.RationalReconstruction(a, new_den, u1*=old_den, m1, sn, true, false);
+            if (! success) 
+                std::cerr << " ***** RationalReconstruction FAILURE ***** ";
+/*
+            std::clog << ", AFTER"
+                      << ", a: " << a
+                      << ", new_den: " << new_den 
+                      << std::endl;
+*/
 			return u1=a;
 		}
 	};
diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C
index d3a6e0fca..9205268f4 100644
--- a/tests/test-last-invariant-factor.C
+++ b/tests/test-last-invariant-factor.C
@@ -49,8 +49,8 @@
 
 using namespace LinBox;
 
-template <class Ring, class LIF, class Vector>
-bool testRandom(const Ring& R,
+template <class Ring, class RandIter, class LIF, class Vector>
+bool testRandom(const Ring& R, RandIter& gen,
                 LIF& lif,
                 LinBox::VectorStream<Vector>& stream1)
 {
@@ -77,7 +77,8 @@ bool testRandom(const Ring& R,
 
         commentator().startIteration ((unsigned)stream1.j ());
 
-        std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
+//         std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
+        std::ostream &report = std::clog;
 
         bool iter_passed = true;
 
@@ -87,7 +88,7 @@ bool testRandom(const Ring& R,
 		VD.write (report, d);
         report << endl;
 
-		BlasMatrix<Ring> D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n);
+		DenseMatrix<Ring> D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n);
 
 		int i, j;
 
@@ -100,15 +101,14 @@ bool testRandom(const Ring& R,
 
 			for (j = 0; j < i; ++ j) {
 
-				R.init(L[(size_t)i][(size_t)j], int64_t(rand() % 10));
-
-				R.init(U[(size_t)j][(size_t)i], int64_t(rand() % 10));
+				gen.random(L[(size_t)i][(size_t)j]);
+                gen.random(U[(size_t)j][(size_t)i]);
 			}
 
 
-		BlasVector<Ring> tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n);
+		DenseVector<Ring> tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n);
 
-		typename BlasMatrix<Ring>::ColIterator col_p;
+		typename DenseMatrix<Ring>::ColIterator col_p;
 
 		i = 0;
 		for (col_p = A.colBegin();
@@ -117,8 +117,6 @@ bool testRandom(const Ring& R,
 			R.assign(e[(size_t)i],R.one);
 			U.apply(tmp1, e);
 			D.apply(tmp2, tmp1);
-                // LinBox::BlasSubvector<BlasVector<Ring> > col_p_v(R,*col_p);
-                // L.apply(col_p_v, tmp2);
 			L.apply(*col_p, tmp2);
 			R.assign(e[(size_t)i],R.zero);
 		}
@@ -183,23 +181,30 @@ int main(int argc, char** argv)
 
 
     bool pass = true;
-
+    int seed = -1;
     static size_t n = 10;
+    static size_t bits = 30;
 
 	static unsigned int iterations = 1;
 
     static Argument args[] = {
         { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT,     &n },
+        { 'b', "-b B", "Set bit size to B.", TYPE_INT,     &bits },
         { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT,     &iterations },
+        {'s', "-s", "Seed for randomness.", TYPE_INT, &seed},
 		END_OF_ARGUMENTS
 
     };
 
 	parseArguments (argc, argv, args);
 
+    if (seed < 0) {
+        seed = time(nullptr);
+    }
+
     typedef Givaro::ZRing<Integer>      Ring;
 
-    Ring R; Ring::RandIter gen(R);
+    Ring R; Ring::RandIter gen(R, seed, bits);
 
 	commentator().start("Last invariant factor test suite", "LIF");
 
@@ -214,11 +219,15 @@ int main(int argc, char** argv)
 
 	LIF lif;
 
-	lif.  setThreshold (30);
+	lif.setThreshold (30);
+
+	if (!testRandom(R, gen, lif, s1)) pass = false;
 
-	if (!testRandom(R, lif, s1)) pass = false;
+    if (!pass) {
+        std::cerr << "Failed with seed: " << seed << std::endl;
+    }
 
-	commentator().stop("Last invariant factor test suite");
+    commentator().stop("Last invariant factor test suite");
     return pass ? 0 : -1;
 }
 

From 99da8495fd713c6cb49b09f5c6c5b4f9224894fa Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Thu, 29 Aug 2019 17:11:08 +0200
Subject: [PATCH 60/63] Added a static_assert when MultiModLiftingContainer is
 used with anything other than Modular<double>

---
 linbox/algorithms/multi-mod-lifting-container.h | 2 ++
 tests/test-last-invariant-factor.C              | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 55df94d73..be9580584 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -68,6 +68,8 @@ namespace LinBox {
     public:
         using Ring = _Ring;
         using Field = _Field;
+        // @fixme Currently not handling other cases...
+        static_assert(std::is_same<Field, Givaro::Modular<double>>::value, "MultiModLifting requires Modular<double>.");
         using PrimeGenerator = _PrimeGenerator;
 
         using RNSSystem = FFPACK::rns_double;
diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C
index 9205268f4..8667f5d02 100644
--- a/tests/test-last-invariant-factor.C
+++ b/tests/test-last-invariant-factor.C
@@ -212,8 +212,8 @@ int main(int argc, char** argv)
 
     RandomDenseStream<Ring> s1 (R, gen, n, iterations);
 
-	typedef DixonSolver<Ring, Givaro::Modular<int32_t>, PrimeIterator<IteratorCategories::HeuristicTag> > Solver;
-        // typedef DixonSolver<Ring, Givaro::Modular<double>, LinBox::PrimeIterator<IteratorCategories::HeuristicTag> > Solver;
+	// typedef DixonSolver<Ring, Givaro::Modular<int32_t>, PrimeIterator<IteratorCategories::HeuristicTag> > Solver;
+	typedef DixonSolver<Ring, Givaro::Modular<double>, LinBox::PrimeIterator<IteratorCategories::HeuristicTag> > Solver;
 
 	typedef LastInvariantFactor<Ring, Solver> LIF;
 

From fa4911fb6d910dbfdcd512d0c9eb15039d92def2 Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Fri, 30 Aug 2019 11:34:46 +0200
Subject: [PATCH 61/63] Fixed HadamardBound bug

---
 linbox/algorithms/multi-mod-lifting-container.h |  7 ++-----
 linbox/solutions/hadamard-bound.h               | 11 ++++++++---
 tests/test-hadamard-bound.C                     | 12 ++++++------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index be9580584..399081894 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -337,13 +337,10 @@ namespace LinBox {
             {
                 numthreads = NUM_THREADS;
 
-                // @fixme @zhuh Can't get that working with NUM_THREADS,
-                // any idea what makes it wrong?
-                // ./test-solve-full -n 1 -m 1 -b 50 -v -l
                 auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row,
                                    FFLAS::StrategyParameter::Threads);
                 int M = _primesCount;
-                FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), {
+                FOR1D(j, M, sp, {
                     auto pj = _primes[j];
                     auto& FR = _FR[j];
                     uint64_t upj = pj;
@@ -360,8 +357,8 @@ namespace LinBox {
                     }
 
                     // digit = A^{-1} * R mod pj
+                    const auto& B = _B[j];
                     auto& digit = digits[j];
-                    auto& B = _B[j];
                     B.apply(digit, FR);
 
                     // Store the very same result in an RNS system,
diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h
index ba00bc071..43d25fe23 100644
--- a/linbox/solutions/hadamard-bound.h
+++ b/linbox/solutions/hadamard-bound.h
@@ -249,6 +249,7 @@ namespace LinBox {
         Integer rowBound;
         HadamardRowBound(rowBound, A);
 #ifdef DEBUG_HADAMARD_BOUND
+        A.write(std::clog) << std::endl;
         std::clog << "rowBound:=" << rowBound << ';' << std::endl;
 #endif
 
@@ -283,15 +284,19 @@ namespace LinBox {
     /**
      * Precise Hadamard bound (bound on determinant) by taking the minimum
      * of the column-wise and the row-wise euclidean norm.
-     *
-     * The result is expressed as bit size.
      */
     template <class IMatrix>
-    double HadamardBound(const IMatrix& A)
+    Integer HadamardBound(const IMatrix& A)
     {
         return DetailedHadamardBound(A).bound;
     }
 
+    template <class IMatrix>
+    double HadamardLogBound(const IMatrix& A)
+    {
+        return Givaro::logtwo(HadamardBound(A));
+    }
+
     // ----- Fast Hadamard bound
 
     template <class IMatrix>
diff --git a/tests/test-hadamard-bound.C b/tests/test-hadamard-bound.C
index 8e7a602f8..6487c7b70 100644
--- a/tests/test-hadamard-bound.C
+++ b/tests/test-hadamard-bound.C
@@ -42,8 +42,8 @@ bool test(const Ring& F, const TMatrix& A, const TVector& b)
     // ---- Determinant
 
     // Compute the bounds
-    double hb = HadamardBound(A);
-    double fastHb = FastHadamardBound(A);
+    double hb = HadamardLogBound(A);
+    double fastHb = FastHadamardLogBound(A);
 
     // Compute the effective determinant
     Integer detA;
@@ -73,17 +73,17 @@ bool test(const Ring& F, const TMatrix& A, const TVector& b)
     solve(num, den, A, b);
 
     for (size_t i = 0u; i < num.size(); ++i) {
-        if (Givaro::logtwo(Givaro::abs(num[i])) > rationalSolveHb.numLogBound + ESPILON) {
+        if (Givaro::abs(num[i]) > rationalSolveHb.numBound) {
             std::cerr << "The rational solve Hadamard bound does not bound the numerator." << std::endl;
-            std::cerr << "num[i]: " << Givaro::logtwo(Givaro::abs(num[i])) << " > " << rationalSolveHb.numLogBound
+            std::cerr << "num[i]: " << Givaro::abs(num[i]) << " > " << rationalSolveHb.numBound
                     << std::endl;
             return false;
         }
     }
 
-    if (Givaro::logtwo(Givaro::abs(den)) > rationalSolveHb.denLogBound + ESPILON) {
+    if (Givaro::abs(den) > rationalSolveHb.denBound) {
         std::cerr << "The rational solve Hadamard bound does not bound the denominator." << std::endl;
-        std::cerr << "den: " << Givaro::logtwo(den) << " > " << rationalSolveHb.denLogBound << std::endl;
+        std::cerr << "den: " << den << " > " << rationalSolveHb.denBound << std::endl;
         return false;
     }
 

From b62610c209605c96ac8fdd844d1dad1699ff4080 Mon Sep 17 00:00:00 2001
From: Jean-Guillaume Dumas <jgdumas@users.noreply.github.com>
Date: Wed, 4 Mar 2020 19:20:46 +0100
Subject: [PATCH 62/63] Update multi-mod-lifting-container.h

---
 linbox/algorithms/multi-mod-lifting-container.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index bd09766b4..2eb6ed9d1 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -48,7 +48,7 @@ namespace LinBox {
      *                  |   for i = 1 .. l:
      *                  |   |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
      *                  |   |   ci = Bi ri mod pi                   < Matrix-vector in Z/pZ
-     *                  |   |   yi = yi + ci * pi^(i-1)             < Done over ZZ
+     *                  |   |   yi = yi + ci * pi^(j-1)             < Done over ZZ
      *                  |   V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
      *                  |   for i = 1 .. l:
      *                  |   |   ri = Qi + (Vi / pi)

From 4a5a774f7d6a31e9bce253f857ccaa0070476fce Mon Sep 17 00:00:00 2001
From: Romain Lebreton <romainlebreton@users.noreply.github.com>
Date: Thu, 5 Mar 2020 15:18:40 +0100
Subject: [PATCH 63/63] Update multi-mod-lifting-container.h

---
 linbox/algorithms/multi-mod-lifting-container.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h
index 2eb6ed9d1..545160804 100644
--- a/linbox/algorithms/multi-mod-lifting-container.h
+++ b/linbox/algorithms/multi-mod-lifting-container.h
@@ -47,7 +47,7 @@ namespace LinBox {
      *                  for j = 1 .. k:
      *                  |   for i = 1 .. l:
      *                  |   |   (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi
-     *                  |   |   ci = Bi ri mod pi                   < Matrix-vector in Z/pZ
+     *                  |   |   ci = Bi Ri mod pi                   < Matrix-vector in Z/pZ
      *                  |   |   yi = yi + ci * pi^(j-1)             < Done over ZZ
      *                  |   V = [R1|...|Rl] - A [c1|...|cl]         < Matrix-matrix in ZZ
      *                  |   for i = 1 .. l: